├── alphatools
    ├── __init__.py
    ├── algo
    │   ├── __init__.py
    │   ├── utils.py
    │   └── risk.py
    ├── misc
    │   ├── __init__.py
    │   └── pair_trade.py
    ├── data
    │   ├── __init__.py
    │   ├── dat_file.pkl
    │   ├── sectors.npy
    │   ├── industries.npy
    │   ├── sector_names.csv
    │   ├── factory
    │   │   └── data_sources.json
    │   ├── factory.py
    │   └── industry_names.csv
    ├── expression
    │   ├── __init__.py
    │   ├── test.py
    │   ├── repro.py
    │   ├── expression.lark
    │   └── expression.py
    ├── fundamentals
    │   ├── __init__.py
    │   ├── make_fundamentals.py~
    │   ├── make_fundamentals.py
    │   └── fundamentals.py
    ├── ics
    │   ├── sectors.npy
    │   ├── industries.npy
    │   ├── sic_major.npy
    │   ├── sic_subclass.npy
    │   ├── __init__.py
    │   ├── sic_specialize.npy
    │   ├── sector_names.csv
    │   ├── ics_scheme.py
    │   └── industry_names.csv
    ├── research
    │   ├── __init__.py
    │   └── research.py
    └── __main__.py
├── setup.cfg
├── MANIFEST.in
├── catboost_info
    ├── time_left.tsv
    ├── learn_error.tsv
    ├── learn
    │   └── events.out.tfevents
    ├── meta.tsv
    └── catboost_training.json
├── .DS_Store
├── ci
    └── test.py
├── tests
    ├── catboost_info
    │   ├── time_left.tsv
    │   ├── learn_error.tsv
    │   ├── learn
    │   │   └── events.out.tfevents
    │   ├── meta.tsv
    │   └── catboost_training.json
    ├── test_sklearn.py
    ├── test_catboost.py
    ├── test_lightgbm.py
    └── expressions.py
├── notebooks
    ├── .DS_Store
    ├── alpha9.png
    ├── Tree84.gv.pdf
    ├── Tree84.gv.png
    ├── Tree84.gv
    ├── pipeline-blaze-factory.ipynb
    ├── pipeline-minimal.ipynb
    ├── one_o_one_alphas.json
    ├── pipeline-blaze-minimal.ipynb
    └── model.txt
├── requirements_blaze_latest.txt
├── requirements_blaze_stable.txt
├── setup.py
├── .travis.yml
├── install_stable.sh
├── install_latest.sh
├── .gitignore
├── requirements_stable.txt
├── requirements_latest.txt
├── LICENSE
└── README.md


/alphatools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/alphatools/algo/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/alphatools/misc/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include data/*
2 | recursive-include data *


--------------------------------------------------------------------------------
/alphatools/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .factory import Factory
2 | 


--------------------------------------------------------------------------------
/catboost_info/time_left.tsv:
--------------------------------------------------------------------------------
1 | iter	Passed	Remaining
2 | 0	0	0
3 | 1	2	0
4 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/.DS_Store


--------------------------------------------------------------------------------
/alphatools/expression/__init__.py:
--------------------------------------------------------------------------------
1 | from .expression import ExpressionAlpha
2 | 


--------------------------------------------------------------------------------
/alphatools/fundamentals/__init__.py:
--------------------------------------------------------------------------------
1 | from .fundamentals import Fundamentals
2 | 


--------------------------------------------------------------------------------
/ci/test.py:
--------------------------------------------------------------------------------
1 | if __name__ == '__main__':
2 |     print('Success!')
3 |     
4 | 


--------------------------------------------------------------------------------
/catboost_info/learn_error.tsv:
--------------------------------------------------------------------------------
1 | iter	RMSE
2 | 0	15.47309493
3 | 1	11.44676992
4 | 


--------------------------------------------------------------------------------
/tests/catboost_info/time_left.tsv:
--------------------------------------------------------------------------------
1 | iter	Passed	Remaining
2 | 0	57	57
3 | 1	59	0
4 | 


--------------------------------------------------------------------------------
/tests/catboost_info/learn_error.tsv:
--------------------------------------------------------------------------------
1 | iter	RMSE
2 | 0	15.47309493
3 | 1	11.44676992
4 | 


--------------------------------------------------------------------------------
/notebooks/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/notebooks/.DS_Store


--------------------------------------------------------------------------------
/notebooks/alpha9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/notebooks/alpha9.png


--------------------------------------------------------------------------------
/alphatools/expression/test.py:
--------------------------------------------------------------------------------
1 | class MyClass():
2 | y = 2
3 | def my_func(self):
4 | x = 2
5 | return(x)
6 | 


--------------------------------------------------------------------------------
/notebooks/Tree84.gv.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/notebooks/Tree84.gv.pdf


--------------------------------------------------------------------------------
/notebooks/Tree84.gv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/notebooks/Tree84.gv.png


--------------------------------------------------------------------------------
/alphatools/ics/sectors.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/alphatools/ics/sectors.npy


--------------------------------------------------------------------------------
/alphatools/data/dat_file.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/alphatools/data/dat_file.pkl


--------------------------------------------------------------------------------
/alphatools/data/sectors.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/alphatools/data/sectors.npy


--------------------------------------------------------------------------------
/alphatools/ics/industries.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/alphatools/ics/industries.npy


--------------------------------------------------------------------------------
/alphatools/ics/sic_major.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/alphatools/ics/sic_major.npy


--------------------------------------------------------------------------------
/alphatools/data/industries.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/alphatools/data/industries.npy


--------------------------------------------------------------------------------
/alphatools/ics/sic_subclass.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/alphatools/ics/sic_subclass.npy


--------------------------------------------------------------------------------
/alphatools/ics/__init__.py:
--------------------------------------------------------------------------------
1 | from .ics_scheme import (
2 |     SICMajorIndustry,
3 |     Sector,
4 |     SubIndustry
5 | )
6 | 


--------------------------------------------------------------------------------
/alphatools/ics/sic_specialize.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/alphatools/ics/sic_specialize.npy


--------------------------------------------------------------------------------
/catboost_info/learn/events.out.tfevents:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/catboost_info/learn/events.out.tfevents


--------------------------------------------------------------------------------
/tests/catboost_info/learn/events.out.tfevents:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stefan-jansen/alphatools/HEAD/tests/catboost_info/learn/events.out.tfevents


--------------------------------------------------------------------------------
/catboost_info/meta.tsv:
--------------------------------------------------------------------------------
1 | name	experiment
2 | iterCount	2
3 | learnErrorLog	learn_error.tsv
4 | testErrorLog	test_error.tsv
5 | timeLeft	time_left.tsv
6 | loss	RMSE	min
7 | 


--------------------------------------------------------------------------------
/tests/catboost_info/meta.tsv:
--------------------------------------------------------------------------------
1 | name	experiment
2 | iterCount	2
3 | learnErrorLog	learn_error.tsv
4 | testErrorLog	test_error.tsv
5 | timeLeft	time_left.tsv
6 | loss	RMSE	min
7 | 


--------------------------------------------------------------------------------
/alphatools/ics/sector_names.csv:
--------------------------------------------------------------------------------
 1 | 0,Healthcare
 2 | 1,Basic Materials
 3 | 2,Services
 4 | 3,Financial
 5 | 4,Technology
 6 | 5,Industrial Goods
 7 | 6,Consumer Goods
 8 | 7,Utilities
 9 | 8,Conglomerates
10 | 


--------------------------------------------------------------------------------
/alphatools/data/sector_names.csv:
--------------------------------------------------------------------------------
 1 | 0,Healthcare
 2 | 1,Basic Materials
 3 | 2,Services
 4 | 3,Financial
 5 | 4,Technology
 6 | 5,Industrial Goods
 7 | 6,Consumer Goods
 8 | 7,Utilities
 9 | 8,Conglomerates
10 | 


--------------------------------------------------------------------------------
/alphatools/data/factory/data_sources.json:
--------------------------------------------------------------------------------
1 | {
2 |     "sample": {
3 | 	"url": "/Users/jonathan/devwork/alphatools/alphatools/data/factory/sample.csv",
4 | 	"schema": "var*{asof_date: datetime, sid: int64, value: float64}"
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------
/alphatools/research/__init__.py:
--------------------------------------------------------------------------------
 1 | from .research import (
 2 |     loaders,
 3 |     blaze_loader,
 4 |     run_pipeline,
 5 |     get_pricing,
 6 |     get_symbols,
 7 |     make_factor_plot,
 8 |     make_quantile_plot,
 9 |     set_bundle
10 | )
11 | 


--------------------------------------------------------------------------------
/alphatools/algo/utils.py:
--------------------------------------------------------------------------------
 1 | from logbook import Logger, StderrHandler, DEBUG, INFO
 2 | 
 3 | log_handler = StderrHandler(
 4 |     format_string='[{record.time:%Y-%m-%d %H:%M:%S.%f}]: ' +
 5 |     '{record.level_name}: {record.func_name}: {record.message}',
 6 |     level=INFO
 7 | )
 8 | log_handler.push_application()
 9 | log = Logger('Algorithm')
10 | 


--------------------------------------------------------------------------------
/catboost_info/catboost_training.json:
--------------------------------------------------------------------------------
1 | {
2 | "meta":{"launch_mode":"Train","name":"experiment","iteration_count":2,"learn_metrics":[{"best_value":"Min","name":"RMSE"}],"test_sets":[],"test_metrics":[],"learn_sets":["learn"]},
3 | "iterations":[
4 | {"learn":[15.47309493],"iteration":0,"passed_time":0.0009688483709,"remaining_time":0.0009688483709},
5 | {"learn":[11.44676992],"iteration":1,"passed_time":0.002063907508,"remaining_time":0}
6 | ]}


--------------------------------------------------------------------------------
/tests/catboost_info/catboost_training.json:
--------------------------------------------------------------------------------
1 | {
2 | "meta":{"launch_mode":"Train","name":"experiment","iteration_count":2,"learn_metrics":[{"best_value":"Min","name":"RMSE"}],"test_sets":[],"test_metrics":[],"learn_sets":["learn"]},
3 | "iterations":[
4 | {"learn":[15.47309493],"iteration":0,"passed_time":0.05778349878,"remaining_time":0.05778349878},
5 | {"learn":[11.44676992],"iteration":1,"passed_time":0.05900830356,"remaining_time":0}
6 | ]}


--------------------------------------------------------------------------------
/tests/test_sklearn.py:
--------------------------------------------------------------------------------
 1 | from sklearn import tree
 2 | from sklearn.datasets import load_iris
 3 | from sklearn.metrics import accuracy_score
 4 | from sklearn.model_selection import train_test_split
 5 | import numpy as np
 6 | 
 7 | def test_tree():
 8 |     iris = load_iris()
 9 |     X = iris.data
10 |     y = iris.target
11 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
12 |     
13 |     clf = tree.DecisionTreeClassifier()
14 |     clf.fit(X_train, y_train)
15 |     preds = clf.predict(X_test)
16 |     assert np.allclose(accuracy_score(y_test, preds), 0.973684210526)
17 | 


--------------------------------------------------------------------------------
/requirements_blaze_latest.txt:
--------------------------------------------------------------------------------
 1 | -e git://github.com/quantopian/datashape.git@bf06a41dc0908baf7c324aeacadba8820468ee78#egg=datashape-dev
 2 | -e git://github.com/quantopian/odo.git@da7f26d87702f5d293763e8ed54c7e25fd3af38#egg=odo-dev
 3 | 
 4 | # Keep cytoolz version in sync with toolz version in requirements.txt
 5 | cytoolz==0.8.2
 6 | 
 7 | # Transitive dependencies of blaze:
 8 | dask[dataframe]==0.17.1
 9 | partd==0.3.7
10 | locket==0.2.0
11 | cloudpickle==0.2.1
12 | itsdangerous==0.24
13 | flask==0.12.4
14 | flask-cors==2.1.3
15 | Jinja2==2.9.6
16 | MarkupSafe==0.23
17 | Werkzeug==0.10.4
18 | psutil==4.3.0
19 | 
20 | -e git://github.com/quantopian/blaze.git@310605323449e375e81a0cf04011c507cd126ef6#egg=blaze-dev
21 | 


--------------------------------------------------------------------------------
/requirements_blaze_stable.txt:
--------------------------------------------------------------------------------
 1 | -e git://github.com/quantopian/datashape.git@bf06a41dc0908baf7c324aeacadba8820468ee78#egg=datashape-dev
 2 | -e git://github.com/quantopian/odo.git@da7f26d87702f5d293763e8ed54c7e25fd3af38#egg=odo-dev
 3 | 
 4 | # Keep cytoolz version in sync with toolz version in requirements.txt
 5 | cytoolz==0.8.2
 6 | 
 7 | # Transitive dependencies of blaze:
 8 | dask[dataframe]==0.13.0
 9 | partd==0.3.7
10 | locket==0.2.0
11 | cloudpickle==0.2.1
12 | itsdangerous==0.24
13 | flask==0.12.4
14 | flask-cors==2.1.3
15 | Jinja2==2.9.6
16 | MarkupSafe==0.23
17 | Werkzeug==0.10.4
18 | psutil==4.3.0
19 | 
20 | -e git://github.com/quantopian/blaze.git@310605323449e375e81a0cf04011c507cd126ef6#egg=blaze-dev
21 | 


--------------------------------------------------------------------------------
/alphatools/__main__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from alphatools.ics.ics_scheme import make_sector_classifier
 3 | 
 4 | import click
 5 | import subprocess
 6 | import sys
 7 | 
 8 | from os import path
 9 | import zipline
10 | 
11 | this_path = path.dirname(__file__)
12 | 
13 | @click.group()
14 | def main():
15 |     pass
16 | 
17 | @main.command()
18 | def get_blaze():
19 |     req = path.join(this_path, 'misc/requirements_blaze.txt')
20 |     print(req)
21 |     subprocess.call([sys.executable, "-m", "pip", "install", "-r" + req])
22 | 
23 | # Example
24 | if __name__ == '__main__':
25 |     install('argh')
26 | 
27 | @main.command()
28 | def ingest():
29 |     print('mapping sectors and industries...')
30 |     make_sector_classifier()
31 |     
32 | if __name__ == '__main__':
33 |     main()
34 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='alphatools',
 5 |     version='0.15',
 6 |     description='Quant finance resarch tools',
 7 |     author='Jonathan Larkin',
 8 |     author_email='jonathan.r.larkin@gmail.com',
 9 |     url = "https://github.com/marketneutral/alphatools",
10 |     download_url = "https://github.com/marketneutral/alphatools/archive/0.11.tar.gz",
11 |     packages=find_packages(),
12 |     python_requires='>=3.5.*',
13 |     install_requires=[
14 |         'zipline<=1.3',
15 |         'alphalens',
16 |         'ipykernel',
17 |         'lark-parser',
18 |         'autopep8',
19 |         'bottleneck',
20 |         'tqdm',
21 |         'pydot'
22 |     ],
23 |     entry_points={
24 |         'console_scripts': [
25 |             'alphatools = alphatools.__main__:main',
26 |         ]
27 |     }
28 | )
29 | 


--------------------------------------------------------------------------------
/tests/test_catboost.py:
--------------------------------------------------------------------------------
 1 | from catboost import CatBoostRegressor
 2 | import numpy as np
 3 | 
 4 | def test_catboost():
 5 |     # Initialize data
 6 |     cat_features = [0, 1, 2]
 7 |     train_data = [
 8 |         ["a", "b", 1, 4, 5, 6],
 9 |         ["a", "b", 4, 5, 6, 7],
10 |         ["c", "d", 30, 40, 50, 60]
11 |     ]
12 |     test_data = [
13 |         ["a", "b", 2, 4, 6, 8],
14 |         ["a", "d", 1, 4, 50, 60]
15 |     ]
16 |     train_labels = [10, 20, 30]
17 |     # Initialize CatBoostRegressor
18 |     model = CatBoostRegressor(
19 |         iterations=2,
20 |         learning_rate=1,
21 |         depth=2,
22 |         random_seed=100
23 |     )
24 |     # Fit model
25 |     model.fit(train_data, train_labels, cat_features)
26 |     # Get predictions
27 |     preds = model.predict(test_data)
28 |     print(preds)
29 |     assert np.array_equal(preds, np.array([9.6, 9.6]))
30 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | sudo: false
 3 | 
 4 | python: 
 5 |   - 3.5
 6 |   - 3.6
 7 | 
 8 | before_install:
 9 |   - export PYTHON_VERSION=$TRAVIS_PYTHON_VERSION
10 |   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
11 |   - bash miniconda.sh -b -p $HOME/miniconda
12 |   - export PATH="$HOME/miniconda/bin:$PATH"
13 |   - export MKL_THREADING_LAYER=GNU
14 |   - hash -r
15 |   - conda config --set always_yes yes --set changeps1 no
16 |   - conda update -q conda
17 |   - conda info -a
18 | 
19 | install:
20 |   - if [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then
21 |       ./install_stable.sh; source activate env_alphatools_stable;
22 |     else
23 |       ./install_latest.sh; source activate env_alphatools_latest;
24 |     fi
25 |   - zipline ingest -b quantopian-quandl
26 |   - pip install nose
27 | #  - alphatools ingest
28 | 
29 | script:
30 |   - python ./ci/test.py
31 |   - cd tests
32 |   - nosetests -v


--------------------------------------------------------------------------------
/notebooks/Tree84.gv:
--------------------------------------------------------------------------------
 1 | digraph Tree84 {
 2 | 	split0 [label="split_feature_name: f19\nthreshold: -2.0784999999999996"]
 3 | 	leaf0 [label="leaf_index: 0\nleaf_value: 0.008256563487728793"]
 4 | 	split0 -> leaf0 [label="<="]
 5 | 	split1 [label="split_feature_name: f19\nthreshold: 0.025500000000000005"]
 6 | 	split2 [label="split_feature_name: f2\nthreshold: -2.2624999999999997"]
 7 | 	leaf1 [label="leaf_index: 1\nleaf_value: 0.028751267421813238"]
 8 | 	split2 -> leaf1 [label="<="]
 9 | 	split3 [label="split_feature_name: f25\nthreshold: 1.2685000000000002"]
10 | 	leaf3 [label="leaf_index: 3\nleaf_value: -0.0027176377740919895"]
11 | 	split3 -> leaf3 [label="<="]
12 | 	leaf4 [label="leaf_index: 4\nleaf_value: 0.0032094788377135734"]
13 | 	split3 -> leaf4 [label=">"]
14 | 	split2 -> split3 [label=">"]
15 | 	split1 -> split2 [label="<="]
16 | 	leaf2 [label="leaf_index: 2\nleaf_value: 0.0012024763815209393"]
17 | 	split1 -> leaf2 [label=">"]
18 | 	split0 -> split1 [label=">"]
19 | }
20 | 


--------------------------------------------------------------------------------
/alphatools/data/factory.py:
--------------------------------------------------------------------------------
 1 | import blaze as bz
 2 | import json
 3 | from datashape import dshape
 4 | 
 5 | from alphatools.research import loaders, blaze_loader
 6 | from zipline.data import bundles
 7 | from zipline.utils.calendars import get_calendar
 8 | from zipline.pipeline.loaders.blaze import from_blaze
 9 | 
10 | from os import path
11 | 
12 | this_file = path.dirname(__file__)
13 | 
14 | with open(path.join(this_file, 'factory/data_sources.json')) as f:
15 |     data_sources = json.load(f)
16 | 
17 | Factory = {}
18 | 
19 | for source in data_sources.keys():
20 |     loc = data_sources[source]['url']
21 |     shape = dshape(data_sources[source]['schema'])
22 | 
23 |     loc = path.expandvars(loc)
24 | 
25 |     expr = bz.data(
26 |         loc,
27 |         dshape=shape
28 |     )
29 |     
30 |     # create the DataSet
31 |     ds = from_blaze(
32 |         expr,
33 |         no_deltas_rule='ignore',
34 |         no_checkpoints_rule='ignore',
35 |         loader=blaze_loader
36 |     )
37 |     Factory = {source: ds}
38 | 


--------------------------------------------------------------------------------
/alphatools/fundamentals/make_fundamentals.py~:
--------------------------------------------------------------------------------
 1 | from zipline.data import bundles
 2 | from zipline.pipeline import Pipeline
 3 | from zipline.pipeline.data import USEquityPricing
 4 | from zipline.pipeline.data import Column  
 5 | from zipline.pipeline.data import DataSet
 6 | from zipline.pipeline.engine import SimplePipelineEngine
 7 | from zipline.pipeline.filters import StaticAssets
 8 | from zipline.pipeline.loaders import USEquityPricingLoader
 9 | from zipline.pipeline.loaders.frame import DataFrameLoader
10 | from zipline.utils.calendars import get_calendar
11 | 
12 | import numpy as np
13 | import pandas as pd
14 | 
15 | trading_calendar = get_calendar('NYSE')
16 | bundle_data = bundles.load('quandl')
17 | 
18 | data_file = '/Users/jonathan/finnd/workspace_data/marketcap_pb_ps_pe_sector_s&p_comp-sharadar/data.csv'
19 | 
20 | df = pd.read_csv(data_file)#, nrows=1000)
21 | df['Date'] = pd.to_datetime(df['Date'])
22 | 
23 | df['sid'] = np.nan
24 | df = df.set_index('Date')
25 | 
26 | df.index = df.index.tz_localize('UTC')
27 | 
28 | dates = df.index.unique()
29 | 
30 | for day in dates:
31 |     file_tickers = df.loc[day]['Ticker']
32 |     sids = []
33 |     for ticker in file_tickers:
34 |         try:
35 |             this_ticker = bundle_data.asset_finder.lookup_symbol(ticker, as_of_date=day)
36 |             this_sid = this_ticker.sid
37 |         except:
38 |             this_sid = np.nan
39 |         sids.append(this_sid)
40 |     df.loc[day]['sid'] = sids
41 | 
42 | df.sid = df.sid.astype(float)
43 | df = df.dropna()
44 | df.sid = df.sid.astype(int)
45 | 


--------------------------------------------------------------------------------
/install_stable.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | conda create -n env_alphatools_stable -y -c conda-forge python=3.5.5 numpy=1.11.3 pandas=0.18.1 scipy=0.17.1 libgfortran=3.0 mkl-service pymc3=3.1 lightgbm=2.2.0 scikit-optimize=0.5.2 scikit-learn lapack catboost pip
 4 | source activate env_alphatools_stable
 5 | python -m pip install -r requirements_stable.txt --no-cache-dir
 6 | python -m pip install -r requirements_blaze_stable.txt --no-cache-dir
 7 | pip install cvxpy==0.4.10 --no-cache-dir --no-binary :all:
 8 | pip install zipline==1.3.0 --no-cache-dir
 9 | pip install ipykernel --no-cache-dir
10 | pip install alphalens==0.3.2 --no-cache-dir
11 | pip install pyfolio --no-cache-dir
12 | pip install graphviz==0.9
13 | cd ..
14 | pip install -e alphatools --no-cache-dir
15 | python -m ipykernel install --user --name env_alphatools_stable --display-name "Python 3.5 (env_alphatools_stable)"
16 | conda install -y pytorch=0.4.1 torchvision -c pytorch
17 | 
18 | source ~/.bashrc
19 | # must append to .bashrc
20 | if [ "$MKL_THREADING_LAYER" = "" ]
21 | then
22 |     export MKL_THREADING_LAYER=GNU
23 |     echo 'export MKL_THREADING_LAYER=GNU' >> ~/.bashrc
24 |     echo 'Please source the .bashrc file to activate MKL_THREADING env variable.'
25 | fi
26 | 
27 | if [ "$THEANO_FLAGS" = "" ]
28 | then
29 |     # Needed for Mac OS X
30 |     export "THEANO_FLAGS='gcc.cxxflags=-Wno-c++11-narrowing'"
31 |     echo "export \"THEANO_FLAGS='gcc.cxxflags=-Wno-c++11-narrowing'\"" >> ~/.bashrc
32 |     echo 'Please source the .bashrc file to activate the THEANO_FLAGS env variable.'
33 | fi
34 | 


--------------------------------------------------------------------------------
/alphatools/expression/repro.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import itertools
 3 | from lark import Lark, Transformer
 4 | 
 5 | grammar = r"""
 6 | 
 7 |     ?value: "(" value ")"
 8 |           | mylocalvar
 9 |           | add
10 |           | SIGNED_NUMBER       -> number
11 | 
12 |     mylocalvar: "mylocalvar"
13 |     add: value "+" value
14 |     number: SIGNED_NUMBER
15 | 
16 |     %import common.SIGNED_NUMBER
17 |     %import common.WS
18 |     %ignore WS
19 | 
20 |     """
21 | 
22 | stack = []
23 | 
24 | class MyTransformer(Transformer):
25 |     vcounter = itertools.count()
26 | 
27 |     def __init__(self):
28 |         stack = []
29 |         self.cmdlist = []
30 | 
31 |     def number(self, items):
32 |         stack.append(items[0].value)
33 |         
34 |     def mylocalvar(self, items):
35 |         stack.append('mylocalvar')
36 | 
37 |     def add(self, items):
38 |         term2 = stack.pop()
39 |         term1 = stack.pop()
40 |         thisv = self.vcounter.next()
41 |         stack.append('v' + str(thisv))
42 |         self.cmdlist.append(
43 |             'v' + str(thisv) + ' = ' + term1 + ' + ' + term2
44 |         )
45 |         
46 |     def transform(self, tree):
47 |         self._transform_tree(tree)
48 |         v1 = stack.pop()
49 |         self.cmdlist.append(
50 |             'out[:] = ' + v1 + '[-1]'
51 |         )
52 | 
53 |         return self.cmdlist
54 | 
55 | 
56 | my_parser = Lark(grammar, start='value')
57 | 
58 | 
59 | text = "mylocalvar + mylocalvar"
60 | text = "mylocalvar + 2.5"
61 | text = "2 + 2"
62 | tree = my_parser.parse(text)
63 | npcmds = MyTransformer().transform(tree)
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/install_latest.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | conda create -n env_alphatools_latest -y -c conda-forge python=$PYTHON_VERSION numpy=1.14.1 pandas=0.22.0 scipy=1.0.0 libgfortran=3.0 mkl-service pymc3=3.5 lightgbm=2.2.0 scikit-optimize=0.5.2 scikit-learn catboost pip
 4 | source activate env_alphatools_latest
 5 | python -m pip install -r requirements_latest.txt --no-cache-dir
 6 | python -m pip install -r requirements_blaze_latest.txt --no-cache-dir
 7 | pip install cvxpy==1.0.9 --no-cache-dir
 8 | pip install zipline==1.3.0 --no-cache-dir
 9 | pip install statsmodels==0.9.0 --upgrade --no-cache-dir
10 | pip install alphalens==0.3.2 --no-cache-dir
11 | pip install pyfolio --no-cache-dir
12 | pip install ipykernel --no-cache-dir
13 | pip install graphviz==0.9
14 | conda install -y -c pytorch pytorch-nightly-cpu
15 | conda install -y -c fastai torchvision-nightly-cpu
16 | conda install -y -c fastai fastai
17 | cd ..
18 | pip install -e alphatools --no-cache-dir
19 | python -m ipykernel install --user --name env_alphatools_latest --display-name "env_alphatools_latest"
20 | 
21 | source ~/.bashrc
22 | # must append to .bashrc
23 | if [ "$MKL_THREADING_LAYER" = "" ]
24 | then
25 |     export MKL_THREADING_LAYER=GNU
26 |     echo 'export MKL_THREADING_LAYER=GNU' >> ~/.bashrc
27 |     echo 'Please source the .bashrc file to activate MKL_THREADING env variable.'
28 | fi
29 | 
30 | if [ "$THEANO_FLAGS" = "" ]
31 | then
32 |     # Needed for Mac OS X
33 |     export "THEANO_FLAGS='gcc.cxxflags=-Wno-c++11-narrowing'"
34 |     echo "export \"THEANO_FLAGS='gcc.cxxflags=-Wno-c++11-narrowing'\"" >> ~/.bashrc
35 |     echo 'Please source the .bashrc file to activate the THEANO_FLAGS env variable.'
36 | fi
37 | 


--------------------------------------------------------------------------------
/alphatools/fundamentals/make_fundamentals.py:
--------------------------------------------------------------------------------
 1 | from zipline.data import bundles
 2 | from zipline.pipeline import Pipeline
 3 | from zipline.pipeline.data import USEquityPricing
 4 | from zipline.pipeline.data import Column  
 5 | from zipline.pipeline.data import DataSet
 6 | from zipline.pipeline.engine import SimplePipelineEngine
 7 | from zipline.pipeline.filters import StaticAssets
 8 | from zipline.pipeline.loaders import USEquityPricingLoader
 9 | from zipline.pipeline.loaders.frame import DataFrameLoader
10 | from zipline.utils.calendars import get_calendar
11 | 
12 | import numpy as np
13 | import pandas as pd
14 | 
15 | trading_calendar = get_calendar('NYSE')
16 | bundle_data = bundles.load('quandl')
17 | 
18 | data_file = 'path/to/sharadar/data'
19 | 
20 | df = pd.read_csv(data_file)#, nrows=1000)
21 | df['Date'] = pd.to_datetime(df['Date'])
22 | 
23 | df['sid'] = np.nan
24 | df = df.set_index('Date')
25 | 
26 | df.index = df.index.tz_localize('UTC')
27 | 
28 | dates = df.index.unique()
29 | 
30 | for day in dates:
31 |     file_tickers = df.loc[day]['Ticker']
32 |     sids = []
33 |     for ticker in file_tickers:
34 |         try:
35 |             this_ticker = bundle_data.asset_finder.lookup_symbol(ticker, as_of_date=day)
36 |             this_sid = this_ticker.sid
37 |         except:
38 |             this_sid = np.nan
39 |         sids.append(this_sid)
40 |     df.loc[day]['sid'] = sids
41 | 
42 | df.sid = df.sid.astype(float)
43 | df = df.dropna()
44 | df.sid = df.sid.astype(int)
45 | 
46 | 
47 | df['MarketCap'] = pd.to_numeric(df['MarketCap'], errors='coerce')
48 | df['P/B'] = pd.to_numeric(df['P/B'], errors='coerce')
49 | df['P/S'] = pd.to_numeric(df['P/S'], errors='coerce')
50 | df['P/E'] = pd.to_numeric(df['P/E'], errors='coerce')
51 | 
52 | # save the df
53 | df.to_pickle('sharadar_with_sid.pkl')
54 | 


--------------------------------------------------------------------------------
/alphatools/fundamentals/fundamentals.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from alphatools.research import loaders
 3 | from zipline.pipeline.data import Column
 4 | from zipline.pipeline.data import DataSet
 5 | from zipline.pipeline.loaders.frame import DataFrameLoader
 6 | 
 7 | from os import path
 8 | myfile_path = path.join(path.dirname(__file__), 'myfile.txt')
 9 | 
10 | df = pd.read_pickle(path.join(path.dirname(__file__), 'sharadar_with_sid.pkl'))
11 | 
12 | MarketCap_frame = (
13 |     df[['MarketCap', 'sid']].
14 |     reset_index().set_index(['Date', 'sid']).
15 |     unstack()
16 | )
17 | 
18 | MarketCap_frame.columns = MarketCap_frame.columns.droplevel()
19 | 
20 | PriceToBook_frame = df[['P/B', 'sid']].reset_index().set_index(['Date', 'sid']).unstack()
21 | PriceToBook_frame.columns = PriceToBook_frame.columns.droplevel()
22 | 
23 | PriceToSales_frame = df[['P/S', 'sid']].reset_index().set_index(['Date', 'sid']).unstack()
24 | PriceToSales_frame.columns = PriceToSales_frame.columns.droplevel()
25 | 
26 | PriceToEarnings_frame = df[['P/E', 'sid']].reset_index().set_index(['Date', 'sid']).unstack()
27 | PriceToEarnings_frame.columns = PriceToEarnings_frame.columns.droplevel()
28 | 
29 | class Fundamentals(DataSet):
30 |     MarketCap = Column(dtype=float)
31 |     PriceToBook = Column(dtype=float)
32 |     PriceToSales = Column(dtype=float)
33 |     PriceToEarnings = Column(dtype=float)
34 | 
35 | # register the loaders
36 | loaders[Fundamentals.MarketCap] = DataFrameLoader(Fundamentals.MarketCap, MarketCap_frame)
37 | loaders[Fundamentals.PriceToBook] = DataFrameLoader(Fundamentals.PriceToBook, PriceToBook_frame)
38 | loaders[Fundamentals.PriceToSales] = DataFrameLoader(Fundamentals.PriceToSales, PriceToSales_frame)
39 | loaders[Fundamentals.PriceToEarnings] = DataFrameLoader(Fundamentals.PriceToEarnings, PriceToEarnings_frame)
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # other
107 | *.pkl
108 | *.pickle
109 | *.*~
110 | *~
111 | *.db
112 | src
113 | alpha*.png
114 | .DS_store


--------------------------------------------------------------------------------
/requirements_stable.txt:
--------------------------------------------------------------------------------
 1 | # Incompatible with earlier PIP versions
 2 | pip>=7.1.0
 3 | # bcolz fails to install if this is not in the build_requires.
 4 | setuptools>18.0
 5 | 
 6 | # Logging
 7 | Logbook==0.12.5
 8 | 
 9 | # Scientific Libraries
10 | 
11 | pytz==2016.4
12 | numpy==1.11.3
13 | 
14 | # for pandas-datareader
15 | requests-file==1.4.1
16 | 
17 | # scipy and pandas are required for statsmodels,
18 | # statsmodels in turn is required for some pandas packages
19 | scipy==0.17.1
20 | pandas==0.18.1
21 | pandas-datareader==0.2.1
22 | # Needed for parts of pandas.stats
23 | patsy==0.4.0
24 | statsmodels==0.6.1
25 | 
26 | python-dateutil==2.4.2
27 | six==1.10.0
28 | 
29 | # For fetching remote data
30 | requests==2.9.1
31 | 
32 | Cython==0.25.2
33 | 
34 | # faster OrderedDict
35 | cyordereddict==0.2.2
36 | 
37 | # faster array ops.
38 | bottleneck==1.0.0
39 | 
40 | contextlib2==0.4.0
41 | 
42 | # networkx requires decorator
43 | decorator==4.0.0
44 | 
45 | # Graph algorithms used by zipline.pipeline
46 | networkx==1.9.1
47 | 
48 | # NumericalExpression pipeline terms.
49 | numexpr==2.6.1
50 | 
51 | # On disk storage format for pipeline data.
52 | bcolz==0.12.1
53 | 
54 | # On disk storage format for pricing data.
55 | h5py==2.7.1
56 | 
57 | # Command line interface helper
58 | click==4.0.0
59 | 
60 | # FUNctional programming utilities
61 | toolz==0.8.2
62 | multipledispatch==0.6.0
63 | 
64 | # for alembic
65 | MarkupSafe==0.23
66 | Mako==1.0.1
67 | # Asset writer and finder
68 | sqlalchemy==1.0.8
69 | # For asset db management
70 | alembic==0.7.7
71 | 
72 | sortedcontainers==1.4.4
73 | # for intervaltree
74 | intervaltree==2.1.0
75 | 
76 | # for caching
77 | lru-dict==1.1.4
78 | 
79 | # For financial risk calculations
80 | empyrical==0.5.0
81 | 
82 | tables==3.4.3
83 | 
84 | # For trading calendars
85 | trading-calendars==1.2.0
86 | 
87 | # Interface definitions.
88 | python-interface==1.4.0
89 | 
90 | # Country Codes
91 | iso3166==0.9
92 | 


--------------------------------------------------------------------------------
/requirements_latest.txt:
--------------------------------------------------------------------------------
 1 | # Incompatible with earlier PIP versions
 2 | pip>=7.1.0
 3 | # bcolz fails to install if this is not in the build_requires.
 4 | setuptools>18.0
 5 | 
 6 | # Logging
 7 | Logbook==0.12.5
 8 | 
 9 | # Scientific Libraries
10 | 
11 | pytz==2016.4
12 | numpy==1.14.1
13 | 
14 | # for pandas-datareader
15 | requests-file==1.4.1
16 | 
17 | # scipy and pandas are required for statsmodels,
18 | # statsmodels in turn is required for some pandas packages
19 | scipy==1.0.0
20 | pandas==0.22.0
21 | pandas-datareader==0.4.0
22 | # Needed for parts of pandas.stats
23 | patsy==0.4.0
24 | 
25 | # Q has statsmodels==0.6.1; I think this is incompatible with scipy==1.0.0
26 | statsmodels==0.9.0
27 | 
28 | python-dateutil==2.4.2
29 | six==1.10.0
30 | 
31 | # For fetching remote data
32 | requests==2.9.1
33 | 
34 | Cython==0.25.2
35 | 
36 | # faster OrderedDict
37 | cyordereddict==0.2.2
38 | 
39 | # faster array ops.
40 | bottleneck==1.0.0
41 | 
42 | contextlib2==0.4.0
43 | 
44 | # networkx requires decorator
45 | decorator==4.0.0
46 | 
47 | # Graph algorithms used by zipline.pipeline
48 | networkx==1.9.1
49 | 
50 | # NumericalExpression pipeline terms.
51 | numexpr==2.6.1
52 | 
53 | # On disk storage format for pipeline data.
54 | bcolz==0.12.1
55 | 
56 | # On disk storage format for pricing data.
57 | h5py==2.7.1
58 | 
59 | # Command line interface helper
60 | click==4.0.0
61 | 
62 | # FUNctional programming utilities
63 | toolz==0.8.2
64 | multipledispatch==0.6.0
65 | 
66 | # for alembic
67 | MarkupSafe==0.23
68 | Mako==1.0.1
69 | # Asset writer and finder
70 | sqlalchemy==1.0.8
71 | # For asset db management
72 | alembic==0.7.7
73 | 
74 | sortedcontainers==1.4.4
75 | # for intervaltree
76 | intervaltree==2.1.0
77 | 
78 | # for caching
79 | lru-dict==1.1.4
80 | 
81 | # For financial risk calculations
82 | empyrical==0.5.0
83 | 
84 | tables==3.4.3
85 | 
86 | # For trading calendars
87 | trading-calendars==1.2.0
88 | 
89 | # Interface definitions.
90 | python-interface==1.4.0
91 | 
92 | # Country Codes
93 | iso3166==0.9
94 | 


--------------------------------------------------------------------------------
/alphatools/algo/risk.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | def L1_risk(wgts, returns):
 5 |     """
 6 |     Returns the "L1" risk as per Konno & Yamazaki; aka the mean-absolute-
 7 |     deviation.
 8 |     """
 9 |     rets = returns.fillna(0.0).as_matrix()
10 |     return np.mean(np.abs(rets.dot(wgts)))
11 | 
12 | def value_at_risk(
13 |         weights,
14 |         returns,
15 |         alpha=0.95):
16 |     """
17 |     Returns the historical simulation VaR at the confidence threshold.
18 |     """
19 |     returns = returns.fillna(0.0).as_matrix()
20 |     portfolio_returns = returns.dot(weights)
21 |     return np.percentile(portfolio_returns, 100 * (1-alpha))
22 | 
23 | def expected_shortfall(
24 |         weights,
25 |         returns,
26 |         alpha=0.95):
27 |     """
28 |     Returns the historical simulation CVaR at the confidence threshold.
29 |     """
30 | 
31 |     var = value_at_risk(weights, returns, alpha)
32 |     returns = returns.fillna(0.0)
33 |     portfolio_returns = returns.dot(weights)
34 |     return np.nanmean(portfolio_returns[portfolio_returns < var])
35 | 
36 | 
37 | def calc_portfolio_risk(
38 |         context,
39 |         data,
40 |         risk_func,
41 |         hist_days=180,
42 |         **kwargs):
43 |     """
44 |     This is a helper function designed to be the primary call in an algo
45 |     for calculating portfolio-level risk. It takes the current context and
46 |     data objects and a `risk_func` (e.g., `value_at_risk`), and formats 
47 |     portfolio weights and returns such that the indicies line up in a numpy
48 |     array.
49 |     """
50 | 
51 |     
52 |     positions = context.portfolio.positions
53 |     positions_index = pd.Index(positions)
54 |     share_counts = pd.Series(  
55 |         index=positions_index,  
56 |         data=[positions[asset].amount for asset in positions]  
57 |     )
58 | 
59 |     current_prices = data.current(positions_index, 'price')  
60 |     current_weights = (
61 |         share_counts * current_prices / context.portfolio.portfolio_value
62 |     )
63 |     
64 |     prices = data.history(
65 |         current_weights.index.tolist(),
66 |         'price',
67 |         hist_days,
68 |         '1d'
69 |     )
70 | 
71 |     daily_rets = prices.pct_change()
72 |     daily_rets = daily_rets - daily_rets.mean(skipna=True)
73 |     daily_rets = daily_rets.fillna(0.0)
74 | 
75 |     risk = risk_func(current_weights.values, daily_rets, **kwargs)
76 |     return risk
77 | 


--------------------------------------------------------------------------------
/tests/test_lightgbm.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | import lightgbm as lgb
 5 | from sklearn.datasets import load_breast_cancer, dump_svmlight_file, load_svmlight_file
 6 | import numpy as np
 7 | from sklearn.model_selection import train_test_split
 8 | 
 9 | 
10 | def test_lgb():
11 |     X_train, X_test, y_train, y_test = train_test_split(
12 |         *load_breast_cancer(True), test_size=0.1, random_state=2
13 |     )
14 |     train_data = lgb.Dataset(X_train, label=y_train)
15 |     valid_data = train_data.create_valid(X_test, label=y_test)
16 | 
17 |     params = {
18 |         "objective": "binary",
19 |         "metric": "auc",
20 |         "min_data": 10,
21 |         "num_leaves": 15,
22 |         "verbose": -1,
23 |         "num_threads": 1,
24 |         "max_bin": 255
25 |     }
26 |     bst = lgb.Booster(params, train_data)
27 |     bst.add_valid(valid_data, "valid_1")
28 | 
29 |     for i in range(30):
30 |         bst.update()
31 |         if i % 10 == 0:
32 |             print(bst.eval_train(), bst.eval_valid())
33 | 
34 |     assert bst.current_iteration() == 30
35 |     assert bst.num_trees() == 30
36 |     assert bst.num_model_per_iteration() == 1
37 | 
38 |     bst.save_model("model.txt")
39 |     pred_from_matr = bst.predict(X_test)
40 |     with tempfile.NamedTemporaryFile() as f:
41 |         tname = f.name
42 |     with open(tname, "w+b") as f:
43 |         dump_svmlight_file(X_test, y_test, f)
44 |     pred_from_file = bst.predict(tname)
45 |     os.remove(tname)
46 |     assert len(pred_from_matr) == len(pred_from_file)
47 |     for preds in zip(pred_from_matr, pred_from_file):
48 |         assert np.allclose(*preds)
49 | 
50 |     # check saved model persistence
51 |     bst = lgb.Booster(params, model_file="model.txt")
52 |     pred_from_model_file = bst.predict(X_test)
53 |     assert len(pred_from_matr) == len(pred_from_model_file)
54 |     for preds in zip(pred_from_matr, pred_from_model_file):
55 |         # we need to check the consistency of model file here, so test for exact equal
56 |         np.equal(*preds)
57 | 
58 |         # check early stopping is working. Make it stop very early, so the scores should be very close to zero
59 |     pred_parameter = {
60 |         "pred_early_stop": True,
61 |         "pred_early_stop_freq": 5,
62 |         "pred_early_stop_margin": 1.5
63 |     }
64 |     pred_early_stopping = bst.predict(X_test, **pred_parameter)
65 |     assert len(pred_from_matr) == len(pred_early_stopping)
66 |     for preds in zip(pred_early_stopping, pred_from_matr):
67 |         # scores likely to be different, but prediction should still be the same
68 |         assert (preds[0] > 0) == (preds[1] > 0)
69 | 


--------------------------------------------------------------------------------
/tests/expressions.py:
--------------------------------------------------------------------------------
 1 | from alphatools.research import run_pipeline, make_factor_plot
 2 | from alphatools.expression import ExpressionAlpha
 3 | from alphatools.ics import Sector, SubIndustry
 4 | from zipline.pipeline.factors import AverageDollarVolume, CustomFactor, Returns
 5 | from zipline.pipeline.data import USEquityPricing as USEP
 6 | from zipline.pipeline import Pipeline
 7 | 
 8 | import pandas as pd
 9 | import numpy as np
10 | 
11 | universe = AverageDollarVolume(window_length=120).top(10)
12 | 
13 | expressions = {
14 |     '0': 'close',
15 |     '1': 'delay(close,1)',
16 |     '2': 'delta(close,5)',
17 |     '3': 'returns',
18 |     '4': 'delta(close,1)/delay(close,1)',
19 |     '5': 'delta(close,5)/delay(close,5)',
20 |     '6': 'rank(close)',
21 |     '7': 'indneutralize(close, IndClass.sector)',
22 |     '8': 'indneutralize(close, IndClass.industry)',
23 | }
24 | 
25 | class Control_1(CustomFactor):
26 |     window_length=2
27 |     inputs=[USEP.close]
28 | 
29 |     def compute(self, today, assets, out, close):
30 |         out[:]=close[-2]
31 | 
32 | class Control_2(CustomFactor):
33 |     window_length=6
34 |     inputs=[USEP.close]
35 | 
36 |     def compute(self, today, assets, out, close):
37 |         out[:]=close[-1] - close[-6]
38 | 
39 | control = {}
40 | control_0 = USEP.close.latest
41 | control_1 = Control_1()
42 | control_2 = Control_2()
43 | control_3 = Returns(window_length=2)
44 | control_4 = Returns(window_length=2)
45 | control_5 = Returns(window_length=6)
46 | control_6 = USEP.close.latest.rank(mask=universe)
47 | control_7 = USEP.close.latest.demean(groupby=Sector(), mask=universe)
48 | control_8 = USEP.close.latest.demean(groupby=SubIndustry(), mask=universe)
49 | 
50 | control = {
51 |     '0': control_0,
52 |     '1': control_1,
53 |     '2': control_2,
54 |     '3': control_3,
55 |     '4': control_4,
56 |     '5': control_5,
57 |     '6': control_6,
58 |     '7': control_7,
59 |     '8': control_8,
60 | }
61 | 
62 | 
63 | start_date = '2017-01-04'
64 | end_date = '2017-01-04'
65 | 
66 | 
67 | def test_factor(expression, control, start_date='2017-01-04', end_date='2017-01-04', show_df=False):
68 |     p = Pipeline(screen=universe)
69 |     p.add(expression.make_pipeline_factor().pipeline_factor(mask=universe), 'expression_alpha')
70 |     p.add(control, 'pipeline_factor')
71 |     df = run_pipeline(p, start_date, end_date)
72 |     print(np.allclose(df['expression_alpha'].values, df['pipeline_factor'].values))
73 |     if show_df:
74 |         print df
75 | 
76 | 
77 | start_fac = 0
78 | end_fac = 8
79 | 
80 | for i in range(start_fac, end_fac+1):
81 |     test_factor(ExpressionAlpha(expressions[str(i)]), control[str(i)], show_df=False)
82 | 


--------------------------------------------------------------------------------
/alphatools/expression/expression.lark:
--------------------------------------------------------------------------------
  1 | // Lark grammar for "expression"-based alphas
  2 | 
  3 | ?value: abs
  4 |       | log
  5 |       | sign
  6 |       | greaterthan
  7 |       | lessthan
  8 |       | equals
  9 |       | logicalor
 10 |       | ternary
 11 |       | scale
 12 |       | signedpower
 13 |       | decay_linear
 14 |       | ts_max
 15 |       | ts_min
 16 |       | ts_argmax
 17 |       | ts_argmin
 18 |       | ts_rank
 19 |       | max
 20 |       | min
 21 |       | stddev
 22 |       | close
 23 |       | opens
 24 |       | high
 25 |       | low
 26 |       | volume
 27 |       | returns
 28 |       | adv
 29 |       | vwap
 30 |       | delay
 31 |       | correlation
 32 |       | covariance
 33 |       | indneutralize
 34 |       | delta
 35 |       | div
 36 |       | mult
 37 |       | minus
 38 |       | plus
 39 |       | powerof
 40 |       | rank
 41 |       | sum
 42 |       | product
 43 |       | ESCAPED_STRING
 44 |       | "(" value ")"
 45 |       | neg
 46 |       | number
 47 |       | cap
 48 |       | SIGNED_NUMBER     -> number
 49 |       | factory
 50 | 
 51 | 
 52 | delay: "delay" "(" value "," SIGNED_NUMBER ")"
 53 | delta: "delta" "(" value "," SIGNED_NUMBER ")"
 54 | 
 55 | correlation: "correlation" "(" value "," value "," SIGNED_NUMBER ")"
 56 | covariance: "covariance" "(" value "," value "," SIGNED_NUMBER ")"
 57 | 
 58 | factory: "factory" "(" ESCAPED_STRING ["," ESCAPED_STRING ] ")"
 59 | 
 60 | close: "close"
 61 | opens: "opens"
 62 | high: "high"
 63 | low: "low"
 64 | volume: "volume"
 65 | returns: "returns"
 66 | vwap: "vwap"
 67 | adv: "adv" SIGNED_NUMBER
 68 | cap: "cap"
 69 | 
 70 | number: SIGNED_NUMBER
 71 | 
 72 | ts_max: "ts_max" "(" value "," SIGNED_NUMBER ")"
 73 | ts_min: "ts_min" "(" value "," SIGNED_NUMBER ")"
 74 | ts_argmax: "ts_argmax" "(" value "," SIGNED_NUMBER ")"
 75 | ts_argmin: "ts_argmin" "(" value "," SIGNED_NUMBER ")"
 76 | ts_rank: "ts_rank" "(" value "," SIGNED_NUMBER ")"
 77 | stddev: "stddev" "(" value "," SIGNED_NUMBER ")"
 78 | indneutralize: "indneutralize" "(" value ["," INDCLASS] ")"
 79 | 
 80 | // not what paper says; this is element by element min
 81 | max: "max" "(" value "," value ")"
 82 | min: "min" "(" value "," value ")"
 83 | 
 84 | div: value "/" value
 85 | minus: value "-" value
 86 | plus: value "+" value
 87 | mult: value "*" value
 88 | powerof: value "^" value
 89 | 
 90 | abs: "abs" "(" value ")"
 91 | log: "log" "(" value ")"
 92 | sign: "sign" "(" value ")"
 93 | greaterthan: value ">" value
 94 | lessthan: value "<" value
 95 | equals: value "==" value
 96 | logicalor: value "||" value
 97 | ternary: value "?" value ":" value
 98 | scale: "scale" "(" value ")"
 99 | signedpower: "signedpower" "(" value "," value ")"
100 | decay_linear: "decay_linear" "(" value "," SIGNED_NUMBER ")"
101 | 
102 | neg: "-" value
103 | rank: "rank" "(" value ")"
104 | sum: "sum" "(" value "," SIGNED_NUMBER ")"
105 | product: "product" "(" value "," SIGNED_NUMBER ")"
106 | 
107 | INDCLASS: ("IndClass.industry" | "IndClass.subindustry" | "IndClass.sector")
108 | 
109 | %import common.SIGNED_NUMBER
110 | %import common.ESCAPED_STRING
111 | %import common.WS
112 | %ignore WS


--------------------------------------------------------------------------------
/alphatools/misc/pair_trade.py:
--------------------------------------------------------------------------------
  1 | # This is a simple pair trade example. Let's
  2 | # look at the 10-year vs 5-year future.
  3 | 
  4 | 
  5 | 
  6 | import numpy as np
  7 | from sklearn.linear_model import LinearRegression
  8 | from zipline.api import (
  9 |     schedule_function,
 10 |     date_rules,
 11 |     time_rules,
 12 |     order_target_percent,
 13 |     record,
 14 |     symbol
 15 | )
 16 |     
 17 | 
 18 | LOOKBACK_DAYS = 120
 19 | ENTRY_THRESHOLD = 2.0
 20 | EXIT_THRESHOLD = 0.50
 21 | 
 22 | def initialize(context):
 23 | # This function runs once when sim starts.
 24 | # Put your `schedule_function()`, `set_slippage`, and `set_commission` calls here.
 25 | 
 26 |     # FV is the five-year root symbol
 27 |     # TU is the ten-year root symbol
 28 |     context.asset_A = symbol('AAPL')
 29 |     context.asset_B = symbol('MSFT')
 30 | 
 31 |     # Keep state if we have crossed the ENTRY_THRESHOLD
 32 |     context.position_initiated = False
 33 |     
 34 |     # Run rebal function every day 15min before
 35 |     # close.
 36 |     schedule_function(
 37 |         func=rebal,
 38 |         date_rule=date_rules.every_day(),
 39 |         time_rule=time_rules.market_close(
 40 |             minutes=15)
 41 |     )
 42 | 
 43 | def before_trading_start(context, data):
 44 |     # This function runs before each daily session
 45 |     pass
 46 | 
 47 | def rebal(context, data):
 48 | # Basic pair trade; price-based regression
 49 |  
 50 |     # get historical data for both futures
 51 |     hist = data.history(
 52 |         [context.asset_A, context.asset_B],
 53 |         fields='price',
 54 |         bar_count=LOOKBACK_DAYS,
 55 |         frequency='1d'
 56 |     )
 57 |     
 58 |     # the reshape is a scikit-learn nuanace when you have 1-dim data
 59 |     asset_A_prices = hist[context.asset_A].values.reshape(-1,1)
 60 |     asset_B_prices = hist[context.asset_B].values.reshape(-1,1)
 61 |     
 62 |     # run a price regression
 63 |     lm = LinearRegression().fit(
 64 |         asset_A_prices, # X
 65 |         asset_B_prices  # y
 66 |     )
 67 |     
 68 |     # get residuals
 69 |     residuals = asset_B_prices - lm.predict(asset_A_prices)
 70 |     
 71 |     # the most recent residual is the current spread
 72 |     current_spread = residuals[-1]
 73 |     
 74 |     # Z-Score of current spread
 75 |     score = (current_spread/np.nanstd(residuals))[-1]
 76 |     
 77 |     target_weights = {}
 78 | 
 79 |     # trading logic
 80 |     if score > ENTRY_THRESHOLD and not context.position_initiated:
 81 |         target_weights[context.asset_A] = -5.0  # i.e, 500%
 82 |         target_weights[context.asset_B] = 5.0
 83 |         context.position_initiated = True
 84 |     elif score < -ENTRY_THRESHOLD and not context.position_initiated:
 85 |         target_weights[context.asset_A] = 5.0
 86 |         target_weights[context.asset_B] = -5.0
 87 |         context.position_initiated = True
 88 |     elif np.abs(score) < EXIT_THRESHOLD and context.position_initiated:
 89 |         #unwind
 90 |         for futures_contract, position in context.portfolio.positions.items():
 91 |             target_weights[futures_contract] = 0
 92 |         context.position_initiated = False
 93 |     
 94 |     for futures_contract, target in target_weights.items():
 95 |         order_target_percent(futures_contract, target)
 96 | 
 97 |     record(A=asset_A_prices[-1][0])
 98 |     record(B=asset_B_prices[-1][0])
 99 |     record(score=score)
100 | 
101 | 


--------------------------------------------------------------------------------
/alphatools/ics/ics_scheme.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from numpy import where, zeros
  4 | 
  5 | from zipline.data import bundles
  6 | from zipline.pipeline.classifiers import Classifier
  7 | from zipline.utils.numpy_utils import int64_dtype, object_dtype
  8 |     
  9 | from os import path
 10 | 
 11 | 
 12 | class SICMajorIndustry(Classifier):
 13 | 
 14 |     dtype = int64_dtype
 15 |     window_length = 0
 16 |     window_safe = True
 17 |     inputs = ()
 18 |     missing_value = -1
 19 |     
 20 |     def __init__(self):
 21 |         self.data = np.load('sic_major.npy')
 22 |         self.names = None
 23 |     def _compute(self, arrays, dates, assets, mask):
 24 |         return where(
 25 |             mask,
 26 |             self.data[assets],
 27 |             self.missing_value,
 28 |         )
 29 |     
 30 | class Sector(Classifier):
 31 | 
 32 |     dtype = int64_dtype
 33 |     window_length = 0
 34 |     window_safe = True
 35 |     inputs = ()
 36 |     missing_value = -1
 37 |     
 38 |     def __init__(self):
 39 |         self.data = np.load(
 40 |             path.join(path.dirname(__file__), 'sectors.npy')
 41 |         )
 42 |         self.names = pd.read_csv(
 43 |             path.join(path.dirname(__file__), 'sector_names.csv'),
 44 |             header=None,
 45 |             index_col=0,
 46 |             names=['Sector_Name']
 47 |         )
 48 | 
 49 |     def _compute(self, arrays, dates, assets, mask):
 50 |         return where(
 51 |             mask,
 52 |             self.data[assets],
 53 |             self.missing_value,
 54 |         )
 55 | 
 56 | 
 57 | class SubIndustry(Classifier):
 58 | 
 59 |     dtype = int64_dtype
 60 |     window_length = 0
 61 |     window_safe = True
 62 |     inputs = ()
 63 |     missing_value = -1
 64 |     
 65 |     def __init__(self):
 66 |         self.data = np.load(
 67 |             path.join(path.dirname(__file__), 'industries.npy')
 68 |         )
 69 |         self.names = pd.read_csv(
 70 |             path.join(path.dirname(__file__), 'industry_names.csv'),
 71 |             header=None,
 72 |             index_col=0,
 73 |             names=['Industry_Name']
 74 |         )
 75 | 
 76 |     def _compute(self, arrays, dates, assets, mask):
 77 |         return where(
 78 |             mask,
 79 |             self.data[assets],
 80 |             self.missing_value,
 81 |         )
 82 | 
 83 | 
 84 | def make_sector_classifier(
 85 |         bundle='quandl',
 86 |         infile='../data/profiles_20170918.csv'):
 87 |     """
 88 |     For a given bundle, create the .npy Sector and Industry classifier
 89 |     files.
 90 |     """
 91 |     bundle_data = bundles.load(bundle)
 92 | 
 93 |     df_p = pd.read_csv(path.join(path.dirname(__file__), infile))
 94 | 
 95 |     labels_sector, uniques_sector = pd.factorize(df_p['sector'])
 96 |     labels_industry, uniques_industry = pd.factorize(df_p['industry'])
 97 |     
 98 |     tickers = bundle_data.asset_finder.lookup_symbols(
 99 |         df_p['quandl_sym'], as_of_date=None)
100 |     
101 |     sids = [asset.sid for asset in tickers]
102 |     max_sid = np.max(bundle_data.asset_finder.sids)
103 | 
104 |     sectors = np.full(np.max(max_sid)+1, -1, np.dtype('int64'))
105 |     industries = np.full(np.max(max_sid)+1, -1, np.dtype('int64'))
106 | 
107 |     sectors[sids] = labels_sector
108 |     industries[sids] = labels_industry
109 | 
110 |     np.save(path.join(
111 |         path.dirname(__file__), '../data/sectors'
112 |     ), sectors)
113 |     np.save(path.join(
114 |         path.dirname(__file__), '../data/industries'
115 |     ), industries)
116 | 
117 |     pd.DataFrame(data=uniques_sector.tolist()).to_csv(
118 |         path.join(path.dirname(__file__), '../data/sector_names.csv'),
119 |         header=False
120 |     )
121 |     pd.DataFrame(data=uniques_industry.tolist()).to_csv(
122 |         path.join(path.dirname(__file__), '../data/industry_names.csv'),
123 |         header=False
124 |     )
125 | 
126 |     return True
127 | 
128 | def make_SIC_classifier(
129 |         bundle='quandl',
130 |         infile='../data/profiles_20170918.csv'):
131 | 
132 |     bundle_data = bundles.load(bundle)
133 | 
134 |     df_p = pd.read_csv(infile)
135 |     df_cik = pd.read_csv('../data/cik_ticker_09152017.csv', sep='|')
136 |     df_cik['SIC'] = df_cik['SIC'].fillna(-1).astype(np.int64).astype(str)
137 |     df_cik['SIC_MajorIndustry'] = df_cik['SIC'].str[:2]
138 |     df_cik['SIC_SubClassification'] = df_cik['SIC'].str[:3]
139 |     df_cik['SIC_Specialization'] = df_cik['SIC'].str[:4]
140 | 
141 |     df_cik_select = df_cik.loc[df_cik.Ticker.isin(df_p['quandl_sym'])]
142 |     tickers = bundle_data.asset_finder.lookup_symbols(df_cik_select['Ticker'], as_of_date=None)
143 |     sids = [asset.sid for asset in tickers]
144 |     max_sid = np.max(bundle_data.asset_finder.sids)
145 | 
146 |     major = np.full(max_sid+1, -1, np.dtype('int64'))
147 |     subclass = np.full(max_sid+1, -1, np.dtype('int64'))
148 |     specialize = np.full(max_sid+1, -1, np.dtype('int64'))
149 |     
150 |     major[sids] = df_cik_select['SIC_MajorIndustry'].astype(np.int64)
151 |     subclass[sids] = df_cik_select['SIC_SubClassification'].astype(np.int64)
152 |     specialize[sids] = df_cik_select['SIC_Specialization'].astype(np.int64)
153 | 
154 |     np.save('sic_major', major)
155 |     np.save('sic_subclass', subclass)
156 |     np.save('sic_specialize', specialize)
157 | 
158 | 
159 |     
160 | if __name__ == '__main__':
161 |     pass
162 | #    make_sector_classifier()
163 | #    make_SIC_classifier()
164 | 


--------------------------------------------------------------------------------
/notebooks/pipeline-blaze-factory.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "<string>:307: UserWarning: Overwriting bundle with name 'futures'\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "from alphatools.research import run_pipeline, get_symbols\n",
 18 |     "from alphatools.data import Factory\n",
 19 |     "import pandas as pd\n",
 20 |     "from zipline.pipeline import Pipeline\n",
 21 |     "from zipline.pipeline.data import USEquityPricing\n",
 22 |     "from zipline.pipeline.filters import StaticAssets\n",
 23 |     "from zipline.pipeline.factors import CustomFactor"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# Example of use in a Custom Factor\n",
 33 |     "\n",
 34 |     "class MyFactor(CustomFactor):\n",
 35 |     "    inputs = [Factory['sample'].value]\n",
 36 |     "    window_length = 10\n",
 37 |     "    \n",
 38 |     "    def compute(self, today, assets, out, factory):\n",
 39 |     "        out[:] = factory[-1]\n"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 3,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "assets = get_symbols(['A', 'AAL'], as_of_date=None)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 4,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/plain": [
 59 |        "[Equity(0 [A]), Equity(2 [AAL])]"
 60 |       ]
 61 |      },
 62 |      "execution_count": 4,
 63 |      "metadata": {},
 64 |      "output_type": "execute_result"
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "assets"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 5,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "p = Pipeline(\n",
 78 |     "    columns={\n",
 79 |     "        'col_A': Factory['sample'].value.latest,\n",
 80 |     "        'col_B': MyFactor()\n",
 81 |     "    },\n",
 82 |     "    screen=StaticAssets(assets)\n",
 83 |     ")"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 6,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "df = run_pipeline(\n",
 93 |     "    p,\n",
 94 |     "    pd.Timestamp('2016-01-05', tz='utc'),\n",
 95 |     "    pd.Timestamp('2018-01-04', tz='utc')\n",
 96 |     ")"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 7,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "data": {
106 |       "text/html": [
107 |        "<div>\n",
108 |        "<table border=\"1\" class=\"dataframe\">\n",
109 |        "  <thead>\n",
110 |        "    <tr style=\"text-align: right;\">\n",
111 |        "      <th></th>\n",
112 |        "      <th></th>\n",
113 |        "      <th>col_A</th>\n",
114 |        "      <th>col_B</th>\n",
115 |        "    </tr>\n",
116 |        "  </thead>\n",
117 |        "  <tbody>\n",
118 |        "    <tr>\n",
119 |        "      <th rowspan=\"2\" valign=\"top\">2016-01-05 00:00:00+00:00</th>\n",
120 |        "      <th>Equity(0 [A])</th>\n",
121 |        "      <td>0.543405</td>\n",
122 |        "      <td>0.543405</td>\n",
123 |        "    </tr>\n",
124 |        "    <tr>\n",
125 |        "      <th>Equity(2 [AAL])</th>\n",
126 |        "      <td>0.424518</td>\n",
127 |        "      <td>0.424518</td>\n",
128 |        "    </tr>\n",
129 |        "    <tr>\n",
130 |        "      <th rowspan=\"2\" valign=\"top\">2016-01-06 00:00:00+00:00</th>\n",
131 |        "      <th>Equity(0 [A])</th>\n",
132 |        "      <td>0.543405</td>\n",
133 |        "      <td>0.543405</td>\n",
134 |        "    </tr>\n",
135 |        "    <tr>\n",
136 |        "      <th>Equity(2 [AAL])</th>\n",
137 |        "      <td>0.424518</td>\n",
138 |        "      <td>0.424518</td>\n",
139 |        "    </tr>\n",
140 |        "    <tr>\n",
141 |        "      <th>2016-01-07 00:00:00+00:00</th>\n",
142 |        "      <th>Equity(0 [A])</th>\n",
143 |        "      <td>0.543405</td>\n",
144 |        "      <td>0.543405</td>\n",
145 |        "    </tr>\n",
146 |        "  </tbody>\n",
147 |        "</table>\n",
148 |        "</div>"
149 |       ],
150 |       "text/plain": [
151 |        "                                              col_A     col_B\n",
152 |        "2016-01-05 00:00:00+00:00 Equity(0 [A])    0.543405  0.543405\n",
153 |        "                          Equity(2 [AAL])  0.424518  0.424518\n",
154 |        "2016-01-06 00:00:00+00:00 Equity(0 [A])    0.543405  0.543405\n",
155 |        "                          Equity(2 [AAL])  0.424518  0.424518\n",
156 |        "2016-01-07 00:00:00+00:00 Equity(0 [A])    0.543405  0.543405"
157 |       ]
158 |      },
159 |      "execution_count": 7,
160 |      "metadata": {},
161 |      "output_type": "execute_result"
162 |     }
163 |    ],
164 |    "source": [
165 |     "df.head()"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": []
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "kernelspec": {
178 |    "display_name": "Python 3.5 (env_alphatools_stable)",
179 |    "language": "python",
180 |    "name": "env_alphatools_stable"
181 |   },
182 |   "language_info": {
183 |    "codemirror_mode": {
184 |     "name": "ipython",
185 |     "version": 3
186 |    },
187 |    "file_extension": ".py",
188 |    "mimetype": "text/x-python",
189 |    "name": "python",
190 |    "nbconvert_exporter": "python",
191 |    "pygments_lexer": "ipython3",
192 |    "version": "3.5.5"
193 |   }
194 |  },
195 |  "nbformat": 4,
196 |  "nbformat_minor": 2
197 | }
198 | 


--------------------------------------------------------------------------------
/alphatools/data/industry_names.csv:
--------------------------------------------------------------------------------
  1 | 0,Medical Laboratories & Research
  2 | 1,Aluminum
  3 | 2,Major Airlines
  4 | 3,Asset Management
  5 | 4,Rental & Leasing Services
  6 | 5,Semiconductor - Integrated Circuits
  7 | 6,General Building Materials
  8 | 7,Auto Parts Stores
  9 | 8,Electronic Equipment
 10 | 9,REIT - Retail
 11 | 10,"Air Services, Other"
 12 | 11,Drug Manufacturers - Major
 13 | 12,Drugs Wholesale
 14 | 13,Regional - Mid-Atlantic Banks
 15 | 14,Business Services
 16 | 15,Auto Dealerships
 17 | 16,Medical Appliances & Equipment
 18 | 17,Biotechnology
 19 | 18,REIT - Residential
 20 | 19,Office Supplies
 21 | 20,Chemicals - Major Diversified
 22 | 21,Property & Casualty Insurance
 23 | 22,Specialized Health Services
 24 | 23,Technical & System Software
 25 | 24,Semiconductor Equipment & Materials
 26 | 25,Technical Services
 27 | 26,Information Technology Services
 28 | 27,REIT - Diversified
 29 | 28,Application Software
 30 | 29,Property Management
 31 | 30,Farm Products
 32 | 31,Drugs - Generic
 33 | 32,Business Software & Services
 34 | 33,Communication Equipment
 35 | 34,Home Health Care
 36 | 35,Oil & Gas Refining & Marketing
 37 | 36,Electric Utilities
 38 | 37,Heavy Construction
 39 | 38,Diversified Electronics
 40 | 39,Life Insurance
 41 | 40,Apparel Stores
 42 | 41,Health Care Plans
 43 | 42,Savings & Loans
 44 | 43,Accident & Health Insurance
 45 | 44,Farm & Construction Machinery
 46 | 45,Credit Services
 47 | 46,Surety & Title Insurance
 48 | 47,Publishing - Newspapers
 49 | 48,REIT - Hotel/Motel
 50 | 49,Mortgage Investment
 51 | 50,Industrial Electrical Equipment
 52 | 51,Textile Industrial
 53 | 52,Aerospace/Defense Products & Services
 54 | 53,Industrial Equipment Wholesale
 55 | 54,Insurance Brokers
 56 | 55,Internet Information Providers
 57 | 56,Steel & Iron
 58 | 57,Specialty Chemicals
 59 | 58,Diversified Utilities
 60 | 59,Real Estate Development
 61 | 60,Regional Airlines
 62 | 61,Drug Delivery
 63 | 62,Security & Protection Services
 64 | 63,Auto Parts
 65 | 64,Diagnostic Substances
 66 | 65,"Movie Production, Theaters"
 67 | 66,Entertainment - Diversified
 68 | 67,Semiconductor - Broad Line
 69 | 68,Diversified Machinery
 70 | 69,Investment Brokerage - National
 71 | 70,Home Furnishings & Fixtures
 72 | 71,Catalog & Mail Order Houses
 73 | 72,Internet Software & Services
 74 | 73,Medical Instruments & Supplies
 75 | 74,"Tobacco Products, Other"
 76 | 75,Semiconductor - Specialized
 77 | 76,Independent Oil & Gas
 78 | 77,Education & Training Services
 79 | 78,Trucking
 80 | 79,Metal Fabrication
 81 | 80,REIT - Office
 82 | 81,Railroads
 83 | 82,Regional - Northeast Banks
 84 | 83,Water Utilities
 85 | 84,Electronics Wholesale
 86 | 85,Shipping
 87 | 86,Staffing & Outsourcing Services
 88 | 87,Healthcare Information Services
 89 | 88,Telecom Services - Domestic
 90 | 89,Gas Utilities
 91 | 90,Packaging & Containers
 92 | 91,Air Delivery & Freight Services
 93 | 92,Multimedia & Graphics Software
 94 | 93,Oil & Gas Drilling & Exploration
 95 | 94,Agricultural Chemicals
 96 | 95,Residential Construction
 97 | 96,Business Equipment
 98 | 97,Industrial Equipment & Components
 99 | 98,Money Center Banks
100 | 99,Management Services
101 | 100,Regional - Pacific Banks
102 | 101,Regional - Southwest  Banks
103 | 102,Oil & Gas Equipment & Services
104 | 103,Broadcasting - Radio
105 | 104,Networking & Communication Devices
106 | 105,Restaurants
107 | 106,Regional - Southeast Banks
108 | 107,Toy & Hobby Stores
109 | 108,Electronics Stores
110 | 109,"Recreational Goods, Other"
111 | 110,"Lumber, Wood Production"
112 | 111,Personal Services
113 | 112,Beverages - Wineries & Distillers
114 | 113,Sporting Goods Stores
115 | 114,Processed & Packaged Goods
116 | 115,Printed Circuit Boards
117 | 116,"Specialty Retail, Other"
118 | 117,"Discount, Variety Stores"
119 | 118,Long-Term Care Facilities
120 | 119,Foreign Money Center Banks
121 | 120,Scientific & Technical Instruments
122 | 121,Department Stores
123 | 122,Major Integrated Oil & Gas
124 | 123,Foreign Regional Banks
125 | 124,Information & Delivery Services
126 | 125,Security Software & Services
127 | 126,Data Storage Devices
128 | 127,Beverages - Brewers
129 | 128,Industrial Metals & Minerals
130 | 129,Regional - Midwest Banks
131 | 130,Building Materials Wholesale
132 | 131,Resorts & Casinos
133 | 132,Food - Major Diversified
134 | 133,Wireless Communications
135 | 134,Grocery Stores
136 | 135,General Contractors
137 | 136,Diversified Investments
138 | 137,Pollution & Treatment Controls
139 | 138,Beverages - Soft Drinks
140 | 139,Diversified Communication Services
141 | 140,Marketing Services
142 | 141,Silver
143 | 142,Broadcasting - TV
144 | 143,Cleaning Products
145 | 144,Gaming Activities
146 | 145,Food Wholesale
147 | 146,Lodging
148 | 147,CATV Systems
149 | 148,Personal Products
150 | 149,Processing Systems & Products
151 | 150,Waste Management
152 | 151,Sporting Activities
153 | 152,Paper & Paper Products
154 | 153,Housewares & Accessories
155 | 154,Textile - Apparel Footwear & Accessories
156 | 155,Textile - Apparel Clothing
157 | 156,Diversified Computer Systems
158 | 157,Rubber & Plastics
159 | 158,REIT - Industrial
160 | 159,Auto Parts Wholesale
161 | 160,Hospitals
162 | 161,Computer Based Systems
163 | 162,Computer Peripherals
164 | 163,Drug Manufacturers - Other
165 | 164,Dairy Products
166 | 165,REIT - Healthcare Facilities
167 | 166,Sporting Goods
168 | 167,Cement
169 | 168,Auto Manufacturers - Major
170 | 169,Copper
171 | 170,Research Services
172 | 171,Specialty Eateries
173 | 172,Investment Brokerage - Regional
174 | 173,Drug Stores
175 | 174,Long Distance Carriers
176 | 175,Gold
177 | 176,Toys & Games
178 | 177,Home Improvement Stores
179 | 178,Machine Tools & Accessories
180 | 179,Nonmetallic Mineral Mining
181 | 180,Recreational Vehicles
182 | 181,Conglomerates
183 | 182,Medical Equipment Wholesale
184 | 183,Confectioners
185 | 184,Home Furnishing Stores
186 | 185,Trucks & Other Vehicles
187 | 186,Consumer Services
188 | 187,Advertising Agencies
189 | 188,Appliances
190 | 189,Publishing - Books
191 | 190,Oil & Gas Pipelines
192 | 191,Small Tools & Accessories
193 | 192,Internet Service Providers
194 | 193,Publishing - Periodicals
195 | 194,Cigarettes
196 | 195,Semiconductor- Memory Chips
197 | 196,Aerospace/Defense - Major Diversified
198 | 197,Drug Related Products
199 | 198,Jewelry Stores
200 | 199,General Entertainment
201 | 200,Computers Wholesale
202 | 201,Closed-End Fund - Debt
203 | 202,Meat Products
204 | 203,Music & Video Stores
205 | 


--------------------------------------------------------------------------------
/alphatools/ics/industry_names.csv:
--------------------------------------------------------------------------------
  1 | 0,Medical Laboratories & Research
  2 | 1,Aluminum
  3 | 2,Major Airlines
  4 | 3,Asset Management
  5 | 4,Rental & Leasing Services
  6 | 5,Semiconductor - Integrated Circuits
  7 | 6,General Building Materials
  8 | 7,Auto Parts Stores
  9 | 8,Electronic Equipment
 10 | 9,REIT - Retail
 11 | 10,"Air Services, Other"
 12 | 11,Drug Manufacturers - Major
 13 | 12,Drugs Wholesale
 14 | 13,Regional - Mid-Atlantic Banks
 15 | 14,Business Services
 16 | 15,Auto Dealerships
 17 | 16,Medical Appliances & Equipment
 18 | 17,Biotechnology
 19 | 18,REIT - Residential
 20 | 19,Office Supplies
 21 | 20,Chemicals - Major Diversified
 22 | 21,Property & Casualty Insurance
 23 | 22,Specialized Health Services
 24 | 23,Technical & System Software
 25 | 24,Semiconductor Equipment & Materials
 26 | 25,Technical Services
 27 | 26,Information Technology Services
 28 | 27,REIT - Diversified
 29 | 28,Application Software
 30 | 29,Property Management
 31 | 30,Farm Products
 32 | 31,Drugs - Generic
 33 | 32,Business Software & Services
 34 | 33,Communication Equipment
 35 | 34,Home Health Care
 36 | 35,Oil & Gas Refining & Marketing
 37 | 36,Electric Utilities
 38 | 37,Heavy Construction
 39 | 38,Diversified Electronics
 40 | 39,Life Insurance
 41 | 40,Apparel Stores
 42 | 41,Health Care Plans
 43 | 42,Savings & Loans
 44 | 43,Accident & Health Insurance
 45 | 44,Farm & Construction Machinery
 46 | 45,Credit Services
 47 | 46,Surety & Title Insurance
 48 | 47,Publishing - Newspapers
 49 | 48,REIT - Hotel/Motel
 50 | 49,Mortgage Investment
 51 | 50,Industrial Electrical Equipment
 52 | 51,Textile Industrial
 53 | 52,Aerospace/Defense Products & Services
 54 | 53,Industrial Equipment Wholesale
 55 | 54,Insurance Brokers
 56 | 55,Internet Information Providers
 57 | 56,Steel & Iron
 58 | 57,Specialty Chemicals
 59 | 58,Diversified Utilities
 60 | 59,Real Estate Development
 61 | 60,Regional Airlines
 62 | 61,Drug Delivery
 63 | 62,Security & Protection Services
 64 | 63,Auto Parts
 65 | 64,Diagnostic Substances
 66 | 65,"Movie Production, Theaters"
 67 | 66,Entertainment - Diversified
 68 | 67,Semiconductor - Broad Line
 69 | 68,Diversified Machinery
 70 | 69,Investment Brokerage - National
 71 | 70,Home Furnishings & Fixtures
 72 | 71,Catalog & Mail Order Houses
 73 | 72,Internet Software & Services
 74 | 73,Medical Instruments & Supplies
 75 | 74,"Tobacco Products, Other"
 76 | 75,Semiconductor - Specialized
 77 | 76,Independent Oil & Gas
 78 | 77,Education & Training Services
 79 | 78,Trucking
 80 | 79,Metal Fabrication
 81 | 80,REIT - Office
 82 | 81,Railroads
 83 | 82,Regional - Northeast Banks
 84 | 83,Water Utilities
 85 | 84,Electronics Wholesale
 86 | 85,Shipping
 87 | 86,Staffing & Outsourcing Services
 88 | 87,Healthcare Information Services
 89 | 88,Telecom Services - Domestic
 90 | 89,Gas Utilities
 91 | 90,Packaging & Containers
 92 | 91,Air Delivery & Freight Services
 93 | 92,Multimedia & Graphics Software
 94 | 93,Oil & Gas Drilling & Exploration
 95 | 94,Agricultural Chemicals
 96 | 95,Residential Construction
 97 | 96,Business Equipment
 98 | 97,Industrial Equipment & Components
 99 | 98,Money Center Banks
100 | 99,Management Services
101 | 100,Regional - Pacific Banks
102 | 101,Regional - Southwest  Banks
103 | 102,Oil & Gas Equipment & Services
104 | 103,Broadcasting - Radio
105 | 104,Networking & Communication Devices
106 | 105,Restaurants
107 | 106,Regional - Southeast Banks
108 | 107,Toy & Hobby Stores
109 | 108,Electronics Stores
110 | 109,"Recreational Goods, Other"
111 | 110,"Lumber, Wood Production"
112 | 111,Personal Services
113 | 112,Beverages - Wineries & Distillers
114 | 113,Sporting Goods Stores
115 | 114,Processed & Packaged Goods
116 | 115,Printed Circuit Boards
117 | 116,"Specialty Retail, Other"
118 | 117,"Discount, Variety Stores"
119 | 118,Long-Term Care Facilities
120 | 119,Foreign Money Center Banks
121 | 120,Scientific & Technical Instruments
122 | 121,Department Stores
123 | 122,Major Integrated Oil & Gas
124 | 123,Foreign Regional Banks
125 | 124,Information & Delivery Services
126 | 125,Security Software & Services
127 | 126,Data Storage Devices
128 | 127,Beverages - Brewers
129 | 128,Industrial Metals & Minerals
130 | 129,Regional - Midwest Banks
131 | 130,Building Materials Wholesale
132 | 131,Resorts & Casinos
133 | 132,Food - Major Diversified
134 | 133,Wireless Communications
135 | 134,Grocery Stores
136 | 135,General Contractors
137 | 136,Diversified Investments
138 | 137,Pollution & Treatment Controls
139 | 138,Beverages - Soft Drinks
140 | 139,Diversified Communication Services
141 | 140,Marketing Services
142 | 141,Silver
143 | 142,Broadcasting - TV
144 | 143,Cleaning Products
145 | 144,Gaming Activities
146 | 145,Food Wholesale
147 | 146,Lodging
148 | 147,CATV Systems
149 | 148,Personal Products
150 | 149,Processing Systems & Products
151 | 150,Waste Management
152 | 151,Sporting Activities
153 | 152,Paper & Paper Products
154 | 153,Housewares & Accessories
155 | 154,Textile - Apparel Footwear & Accessories
156 | 155,Textile - Apparel Clothing
157 | 156,Diversified Computer Systems
158 | 157,Rubber & Plastics
159 | 158,REIT - Industrial
160 | 159,Auto Parts Wholesale
161 | 160,Hospitals
162 | 161,Computer Based Systems
163 | 162,Computer Peripherals
164 | 163,Drug Manufacturers - Other
165 | 164,Dairy Products
166 | 165,REIT - Healthcare Facilities
167 | 166,Sporting Goods
168 | 167,Cement
169 | 168,Auto Manufacturers - Major
170 | 169,Copper
171 | 170,Research Services
172 | 171,Specialty Eateries
173 | 172,Investment Brokerage - Regional
174 | 173,Drug Stores
175 | 174,Long Distance Carriers
176 | 175,Gold
177 | 176,Toys & Games
178 | 177,Home Improvement Stores
179 | 178,Machine Tools & Accessories
180 | 179,Nonmetallic Mineral Mining
181 | 180,Recreational Vehicles
182 | 181,Conglomerates
183 | 182,Medical Equipment Wholesale
184 | 183,Confectioners
185 | 184,Home Furnishing Stores
186 | 185,Trucks & Other Vehicles
187 | 186,Consumer Services
188 | 187,Advertising Agencies
189 | 188,Appliances
190 | 189,Publishing - Books
191 | 190,Oil & Gas Pipelines
192 | 191,Small Tools & Accessories
193 | 192,Internet Service Providers
194 | 193,Publishing - Periodicals
195 | 194,Cigarettes
196 | 195,Semiconductor- Memory Chips
197 | 196,Aerospace/Defense - Major Diversified
198 | 197,Drug Related Products
199 | 198,Jewelry Stores
200 | 199,General Entertainment
201 | 200,Computers Wholesale
202 | 201,Closed-End Fund - Debt
203 | 202,Meat Products
204 | 203,Music & Video Stores
205 | 


--------------------------------------------------------------------------------
/alphatools/research/research.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | import time
  4 | from zipline.data import bundles
  5 | from zipline.data.data_portal import DataPortal
  6 | from zipline.pipeline.data import USEquityPricing
  7 | from zipline.pipeline.engine import SimplePipelineEngine
  8 | from zipline.pipeline.loaders import USEquityPricingLoader
  9 | from zipline.utils.calendars import get_calendar
 10 | from zipline.assets._assets import Equity
 11 | from zipline.pipeline.loaders.blaze import BlazeLoader, from_blaze
 12 | from zipline.utils.run_algo import load_extensions
 13 | 
 14 | 
 15 | # Load extensions.py; this allows you access to custom bundles
 16 | load_extensions(
 17 |     default=True,
 18 |     extensions=[],
 19 |     strict=True,
 20 |     environ=os.environ,
 21 | )
 22 | 
 23 | 
 24 | # Set-Up Pricing Data Access
 25 | trading_calendar = get_calendar('NYSE')
 26 | bundle = 'quandl'
 27 | bundle_data = bundles.load(bundle)
 28 | 
 29 | 
 30 | loaders = {}
 31 | 
 32 | # create and empty BlazeLoader
 33 | blaze_loader = BlazeLoader()
 34 | 
 35 | def my_dispatcher(column):
 36 |     return loaders[column]
 37 | 
 38 | pipeline_loader = USEquityPricingLoader(
 39 |     bundle_data.equity_daily_bar_reader,
 40 |     bundle_data.adjustment_reader,
 41 | )
 42 | 
 43 | def choose_loader(column):
 44 |     if column in USEquityPricing.columns:
 45 |         return pipeline_loader
 46 |     try:
 47 |         return my_dispatcher(column)
 48 |     except:
 49 |         pass
 50 |     return blaze_loader
 51 | 
 52 | # Set-Up Pipeline Engine
 53 | engine = SimplePipelineEngine(
 54 |     get_loader=choose_loader,
 55 |     calendar=trading_calendar.all_sessions,
 56 |     asset_finder=bundle_data.asset_finder,
 57 | )
 58 | 
 59 | def run_pipeline(pipeline, start_date, end_date):
 60 |     return engine.run_pipeline(
 61 |         pipeline,
 62 |         pd.Timestamp(start_date, tz='utc'),
 63 |         pd.Timestamp(end_date, tz='utc')
 64 |     )
 65 | 
 66 | 
 67 | 
 68 | data = DataPortal(
 69 |     bundle_data.asset_finder,
 70 |     trading_calendar=trading_calendar,
 71 |     first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day,
 72 |     equity_minute_reader=None,
 73 |     equity_daily_reader=bundle_data.equity_daily_bar_reader,
 74 |     adjustment_reader=bundle_data.adjustment_reader,
 75 | )
 76 | 
 77 | def set_bundle(name, calendar='NYSE'):
 78 |     global trading_calendar
 79 |     global bundle
 80 |     global bundle_data
 81 |     global engine
 82 |     global choose_loader
 83 |     global data
 84 | 
 85 |     bundle = name
 86 |     trading_calendar = get_calendar(calendar)
 87 |     bundle_data = bundles.load(bundle)
 88 |     engine = SimplePipelineEngine(
 89 |         get_loader=choose_loader,
 90 |         calendar=trading_calendar.all_sessions,
 91 |         asset_finder=bundle_data.asset_finder,
 92 |     )
 93 | 
 94 |     data = DataPortal(
 95 |         bundle_data.asset_finder,
 96 |         trading_calendar=trading_calendar,
 97 |         first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day,
 98 |         equity_minute_reader=None,
 99 |         equity_daily_reader=bundle_data.equity_daily_bar_reader,
100 |         adjustment_reader=bundle_data.adjustment_reader,
101 |     )
102 | 
103 |         
104 |     
105 | 
106 | def get_symbols(tickers, as_of_date=None):
107 |     if (type(tickers)==str):
108 |         return bundle_data.asset_finder.lookup_symbols(
109 |             [tickers], as_of_date=as_of_date)
110 |     else:
111 |         if(type(tickers[0])==Equity):
112 |            return tickers
113 |         else:
114 |            return bundle_data.asset_finder.lookup_symbols(
115 |                tickers, as_of_date=as_of_date)
116 | 
117 | def get_pricing(tickers, start_date, end_date, field='close'):
118 | 
119 |     end_dt = pd.Timestamp(end_date, tz='UTC', offset='C')
120 |     start_dt = pd.Timestamp(start_date, tz='UTC', offset='C')
121 |         
122 |     symbols = get_symbols(tickers, as_of_date=end_dt)
123 |     
124 |     end_loc = trading_calendar.closes.index.get_loc(end_dt)
125 |     start_loc = trading_calendar.closes.index.get_loc(start_dt)    
126 |     
127 |     dat = data.get_history_window(
128 |         assets=symbols,
129 |         end_dt=end_dt,
130 |         bar_count=end_loc - start_loc,
131 |         frequency='1d',
132 |         field=field,
133 |         data_frequency='daily'
134 |     )
135 | 
136 |     return dat
137 | 
138 | import alphalens as al
139 | 
140 | def make_quantile_plot(df, start_date, end_date):
141 |     assets = df.index.levels[1].values.tolist()
142 |     df = df.dropna()
143 |     pricing = get_pricing(
144 |         assets,
145 |         start_date,
146 |         end_date,
147 |         'close'
148 |     )
149 |     
150 |     factor_names = df.columns
151 |     factor_data = {}
152 | 
153 |     start_time = time.clock()
154 |     for factor in factor_names:
155 |         print("Formatting factor data for: " + factor)
156 |         factor_data[factor] = al.utils.get_clean_factor_and_forward_returns(
157 |             factor=df[factor],
158 |             prices=pricing,
159 |             periods=[1]
160 |         )
161 |     end_time = time.clock()
162 |     print("Time to get arrange factor data: %.2f secs" % (end_time - start_time))
163 |     
164 |     qr_factor_returns = []
165 | 
166 |     for i, factor in enumerate(factor_names):
167 |         mean_ret, _ = al.performance.mean_return_by_quantile(factor_data[factor])
168 |         mean_ret.columns = [factor]
169 |         qr_factor_returns.append(mean_ret)
170 | 
171 |     df_qr_factor_returns = pd.concat(qr_factor_returns, axis=1)
172 | 
173 |     (10000*df_qr_factor_returns).plot.bar(
174 |         subplots=True,
175 |         sharey=True,
176 |         layout=(4,2),
177 |         figsize=(14, 14),
178 |         legend=False,
179 |         title='Alphas Comparison: Basis Points Per Day per Quantile'
180 |     )
181 | 
182 |     return df_qr_factor_returns
183 |     
184 | 
185 | def make_factor_plot(df, start_date, end_date):
186 |     assets = df.index.levels[1].values.tolist()
187 |     df = df.dropna()
188 |     pricing = get_pricing(
189 |         assets,
190 |         start_date,
191 |         end_date,
192 |         'close'
193 |     )
194 |     
195 |     factor_names = df.columns
196 |     factor_data = {}
197 | 
198 |     start_time = time.clock()
199 |     for factor in factor_names:
200 |         print("Formatting factor data for: " + factor)
201 |         factor_data[factor] = al.utils.get_clean_factor_and_forward_returns(
202 |             factor=df[factor],
203 |             prices=pricing,
204 |             periods=[1],
205 |             quantiles=1
206 |         )
207 |     end_time = time.clock()
208 |     print("Time to get arrange factor data: %.2f secs" % (end_time - start_time))
209 |     
210 |     ls_factor_returns = []
211 | 
212 |     start_time = time.clock()
213 |     for i, factor in enumerate(factor_names):
214 |         ls = al.performance.factor_returns(factor_data[factor])
215 |         ls.columns = [factor]
216 |         ls_factor_returns.append(ls)
217 |     end_time = time.clock()
218 |     print("Time to generate long/short returns: %.2f secs" % (end_time - start_time))
219 | 
220 |     df_ls_factor_returns = pd.concat(ls_factor_returns, axis=1)
221 |     (1+df_ls_factor_returns).cumprod().plot(title='Cumulative Factor Returns');
222 |     return df_ls_factor_returns
223 | 
224 | 


--------------------------------------------------------------------------------
/notebooks/pipeline-minimal.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "                                            col_A  col_B  price\n",
 13 |       "2016-01-05 00:00:00+00:00 Equity(0 [A])    1468.0   True  40.69\n",
 14 |       "                          Equity(2 [AAL])  1469.0  False  40.91\n",
 15 |       "2016-01-06 00:00:00+00:00 Equity(0 [A])    1470.0   True  40.55\n",
 16 |       "                          Equity(2 [AAL])  1471.0  False  40.52\n",
 17 |       "2016-01-07 00:00:00+00:00 Equity(0 [A])    1472.0   True  40.73\n",
 18 |       "                          Equity(2 [AAL])  1473.0  False  41.23\n"
 19 |      ]
 20 |     }
 21 |    ],
 22 |    "source": [
 23 |     "from zipline.data import bundles\n",
 24 |     "from zipline.pipeline import Pipeline\n",
 25 |     "from zipline.pipeline.data import USEquityPricing\n",
 26 |     "from zipline.pipeline.data import Column  \n",
 27 |     "from zipline.pipeline.data import DataSet\n",
 28 |     "from zipline.pipeline.engine import SimplePipelineEngine\n",
 29 |     "from zipline.pipeline.filters import StaticAssets\n",
 30 |     "from zipline.pipeline.loaders import USEquityPricingLoader\n",
 31 |     "from zipline.pipeline.loaders.frame import DataFrameLoader\n",
 32 |     "from zipline.utils.calendars import get_calendar\n",
 33 |     "\n",
 34 |     "import numpy as np\n",
 35 |     "import pandas as pd\n",
 36 |     "\n",
 37 |     "trading_calendar = get_calendar('NYSE')\n",
 38 |     "bundle_data = bundles.load('quandl')\n",
 39 |     "\n",
 40 |     "\n",
 41 |     "# Set up Custom Data Source for two sids for DataFrameLoader\n",
 42 |     "class MyDataSet(DataSet):  \n",
 43 |     "    column_A = Column(dtype=float)\n",
 44 |     "    column_B = Column(dtype=bool) \n",
 45 |     "\n",
 46 |     "dates = pd.date_range('2014-01-01', '2017-01-01', tz='UTC')\n",
 47 |     "assets = bundle_data.asset_finder.lookup_symbols(['A', 'AAL'], as_of_date=None)\n",
 48 |     "sids = pd.Int64Index([asset.sid for asset in assets])\n",
 49 |     "\n",
 50 |     "# The values for Column A will just be a 2D array of numbers ranging from 1 -> N.  \n",
 51 |     "column_A_frame = pd.DataFrame(  \n",
 52 |     "    data=np.arange(len(dates)*len(assets), dtype=float).reshape(len(dates), len(assets)),  \n",
 53 |     "    index=dates,\n",
 54 |     "    columns=sids,\n",
 55 |     ")\n",
 56 |     "\n",
 57 |     "# Column B will always provide True for 0 and False for 1.  \n",
 58 |     "column_B_frame = pd.DataFrame(data={sids[0]: True, sids[1]: False}, index=dates)\n",
 59 |     "\n",
 60 |     "loaders = {  \n",
 61 |     "    MyDataSet.column_A: DataFrameLoader(MyDataSet.column_A, column_A_frame),  \n",
 62 |     "    MyDataSet.column_B: DataFrameLoader(MyDataSet.column_B, column_B_frame),  \n",
 63 |     "}\n",
 64 |     "\n",
 65 |     "def my_dispatcher(column):\n",
 66 |     "    return loaders[column]\n",
 67 |     "\n",
 68 |     "\n",
 69 |     "# Set up pipeline engine\n",
 70 |     "\n",
 71 |     "# Loader for pricing\n",
 72 |     "pipeline_loader = USEquityPricingLoader(\n",
 73 |     "    bundle_data.equity_daily_bar_reader,\n",
 74 |     "    bundle_data.adjustment_reader,\n",
 75 |     ")\n",
 76 |     "\n",
 77 |     "def choose_loader(column):\n",
 78 |     "    if column in USEquityPricing.columns:\n",
 79 |     "        return pipeline_loader\n",
 80 |     "    return my_dispatcher(column)\n",
 81 |     "\n",
 82 |     "engine = SimplePipelineEngine(\n",
 83 |     "    get_loader=choose_loader,\n",
 84 |     "    calendar=trading_calendar.all_sessions,\n",
 85 |     "    asset_finder=bundle_data.asset_finder,\n",
 86 |     ")\n",
 87 |     "\n",
 88 |     "p = Pipeline(\n",
 89 |     "    columns={\n",
 90 |     "        'price': USEquityPricing.close.latest,\n",
 91 |     "        'col_A': MyDataSet.column_A.latest,\n",
 92 |     "        'col_B': MyDataSet.column_B.latest\n",
 93 |     "    },\n",
 94 |     "    screen=StaticAssets(assets)\n",
 95 |     ")\n",
 96 |     "\n",
 97 |     "df = engine.run_pipeline(\n",
 98 |     "    p,\n",
 99 |     "    pd.Timestamp('2016-01-05', tz='utc'),\n",
100 |     "    pd.Timestamp('2016-01-07', tz='utc')\n",
101 |     ")\n",
102 |     "\n",
103 |     "print(df)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 2,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "1.11.3\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "print(np.__version__)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 3,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "loader = my_dispatcher(MyDataSet.column_A)\n",
130 |     "adj_array = loader.load_adjusted_array(\n",
131 |     "    [MyDataSet.column_A],\n",
132 |     "    dates,\n",
133 |     "    sids,\n",
134 |     "    np.ones((len(dates), len(sids)), dtype=bool)\n",
135 |     ")\n"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 4,
141 |    "metadata": {},
142 |    "outputs": [
143 |     {
144 |      "name": "stdout",
145 |      "output_type": "stream",
146 |      "text": [
147 |       "Adjusted Array (float64):\n",
148 |       "\n",
149 |       "Data:\n",
150 |       "array([[  0.00000000e+00,   1.00000000e+00],\n",
151 |       "       [  2.00000000e+00,   3.00000000e+00],\n",
152 |       "       [  4.00000000e+00,   5.00000000e+00],\n",
153 |       "       ..., \n",
154 |       "       [  2.18800000e+03,   2.18900000e+03],\n",
155 |       "       [  2.19000000e+03,   2.19100000e+03],\n",
156 |       "       [  2.19200000e+03,   2.19300000e+03]])\n",
157 |       "\n",
158 |       "Adjustments:\n",
159 |       "{}\n",
160 |       "\n"
161 |      ]
162 |     }
163 |    ],
164 |    "source": [
165 |     "print(list(adj_array.values())[0].inspect())"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 5,
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "data": {
175 |       "text/plain": [
176 |        "zipline.pipeline.data.dataset.BoundColumn"
177 |       ]
178 |      },
179 |      "execution_count": 5,
180 |      "metadata": {},
181 |      "output_type": "execute_result"
182 |     }
183 |    ],
184 |    "source": [
185 |     "type(MyDataSet.column_A)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 6,
191 |    "metadata": {},
192 |    "outputs": [
193 |     {
194 |      "data": {
195 |       "text/plain": [
196 |        "Int64Index([0, 2], dtype='int64')"
197 |       ]
198 |      },
199 |      "execution_count": 6,
200 |      "metadata": {},
201 |      "output_type": "execute_result"
202 |     }
203 |    ],
204 |    "source": [
205 |     "column_A_frame.columns"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": []
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {},
219 |    "outputs": [],
220 |    "source": []
221 |   }
222 |  ],
223 |  "metadata": {
224 |   "kernelspec": {
225 |    "display_name": "Python 3.5 (env_alphatools)",
226 |    "language": "python",
227 |    "name": "env_alphatools"
228 |   },
229 |   "language_info": {
230 |    "codemirror_mode": {
231 |     "name": "ipython",
232 |     "version": 3
233 |    },
234 |    "file_extension": ".py",
235 |    "mimetype": "text/x-python",
236 |    "name": "python",
237 |    "nbconvert_exporter": "python",
238 |    "pygments_lexer": "ipython3",
239 |    "version": "3.5.5"
240 |   },
241 |   "varInspector": {
242 |    "cols": {
243 |     "lenName": 16,
244 |     "lenType": 16,
245 |     "lenVar": 40
246 |    },
247 |    "kernels_config": {
248 |     "python": {
249 |      "delete_cmd_postfix": "",
250 |      "delete_cmd_prefix": "del ",
251 |      "library": "var_list.py",
252 |      "varRefreshCmd": "print(var_dic_list())"
253 |     },
254 |     "r": {
255 |      "delete_cmd_postfix": ") ",
256 |      "delete_cmd_prefix": "rm(",
257 |      "library": "var_list.r",
258 |      "varRefreshCmd": "cat(var_dic_list()) "
259 |     }
260 |    },
261 |    "types_to_exclude": [
262 |     "module",
263 |     "function",
264 |     "builtin_function_or_method",
265 |     "instance",
266 |     "_Feature"
267 |    ],
268 |    "window_display": false
269 |   }
270 |  },
271 |  "nbformat": 4,
272 |  "nbformat_minor": 2
273 | }
274 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2018 Jonathan Larkin
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/notebooks/one_o_one_alphas.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "1": "(rank(ts_argmax(signedpower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)", 
  3 |     "2": "(-1 * correlation(rank(delta(log(volume), 2)), rank(((close - opens) / opens)), 6))", 
  4 |     "3": "(-1 * correlation(rank(opens), rank(volume), 10))", 
  5 |     "4": "(-1 * ts_rank(rank(low), 9))", 
  6 |     "5": "(rank((opens - (sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap)))))", 
  7 |     "6": "(-1 * correlation(opens, volume, 10))", 
  8 |     "7": "((adv20 < volume) ? ((-1 * ts_rank(abs(delta(close, 7)), 60)) * sign(delta(close, 7))) : (-1* 1))", 
  9 |     "8": "-1*rank(((sum(opens, 5)*sum(returns, 5))-delay((sum(opens, 5)*sum(returns, 5)),10)))", 
 10 |     "9": "((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ? delta(close, 1) : (-1 * delta(close, 1))))", 
 11 |     "10": "rank(((0 < ts_min(delta(close, 1), 4)) ? delta(close, 1) : ((ts_max(delta(close, 1), 4) < 0) ? delta(close, 1) : (-1 * delta(close, 1)))))", 
 12 |     "11": "((rank(ts_max((vwap - close), 3)) + rank(ts_min((vwap - close), 3))) * rank(delta(volume, 3)))", 
 13 |     "12": "(sign(delta(volume, 1)) * (-1 * delta(close, 1)))", 
 14 |     "13": "(-1 * rank(covariance(rank(close), rank(volume), 5)))", 
 15 |     "14": "((-1 * rank(delta(returns, 3))) * correlation(opens, volume, 10))", 
 16 |     "15": "(-1 * sum(rank(correlation(rank(high), rank(volume), 3)), 3))", 
 17 |     "16": "(-1 * rank(covariance(rank(high), rank(volume), 5)))", 
 18 |     "17": "(((-1 * rank(ts_rank(close, 10))) * rank(delta(delta(close, 1), 1))) *rank(ts_rank((volume / adv20), 5)))", 
 19 |     "18": "(-1 * rank(((stddev(abs((close - opens)), 5) + (close - opens)) + correlation(close, opens,10))))", 
 20 |     "19": "((-1 * sign(((close - delay(close, 7)) + delta(close, 7)))) * (1 + rank((1 + sum(returns, 250)))))", 
 21 |     "20": "(((-1 * rank((opens - delay(high, 1)))) * rank((opens - delay(close, 1)))) * rank((opens - delay(low, 1))))", 
 22 |     "21": "((((sum(close, 8) / 8) + stddev(close, 8)) < (sum(close, 2) / 2)) ? (-1 * 1) : (((sum(close, 2) / 2) < ((sum(close, 8) / 8) - stddev(close, 8))) ? 1 : (((1 < (volume / adv20)) || ((volume/adv20) == 1)) ? 1 : (-1 * 1))))", 
 23 |     "22": "(-1 * (delta(correlation(high, volume, 5), 5) * rank(stddev(close, 20))))", 
 24 |     "23": "(((sum(high, 20) / 20) < high) ? (-1 * delta(high, 2)) : 0)", 
 25 |     "24": "((((delta((sum(close, 100) / 100), 100) / delay(close, 100)) < 0.05) || ((delta((sum(close, 100) / 100), 100) / delay(close, 100)) == 0.05)) ? (-1 * (close - ts_min(close, 100))) : (-1 * delta(close, 3)))", 
 26 |     "25": "rank(((((-1 * returns) * adv20) * vwap) * (high - close)))", 
 27 |     "26": "(-1 * ts_max(correlation(ts_rank(volume, 5), ts_rank(high, 5), 5), 3))", 
 28 |     "27": "((0.5 < rank((sum(correlation(rank(volume), rank(vwap), 6), 2) / 2.0))) ? (-1 * 1) : 1)", 
 29 |     "28": "scale(((correlation(adv20, low, 5) + ((high + low) / 2)) - close))", 
 30 |     "29": "(min(product(rank(rank(scale(log(sum(ts_min(rank(rank((-1 * rank(delta((close - 1),5))))), 2), 1))))), 1), 5) + ts_rank(delay((-1 * returns), 6), 5))", 
 31 |     "30": "(((1.0 - rank(((sign((close - delay(close, 1))) + sign((delay(close, 1) - delay(close, 2)))) + sign((delay(close, 2) - delay(close, 3)))))) * sum(volume, 5)) / sum(volume, 20))", 
 32 |     "31": "((rank(rank(rank(decay_linear((-1 * rank(rank(delta(close, 10)))), 10)))) + rank((-1 * delta(close, 3)))) + sign(scale(correlation(adv20, low, 12))))", 
 33 |     "32": "(scale(((sum(close, 7) / 7) - close)) + (20 * scale(correlation(vwap, delay(close, 5), 230))))", 
 34 |     "33": "rank((-1 * ((1 - (opens / close))^1)))", 
 35 |     "34": "rank(((1 - rank((stddev(returns, 2) / stddev(returns, 5)))) + (1 - rank(delta(close, 1)))))", 
 36 |     "35": "((ts_rank(volume, 32) * (1 - ts_rank(((close + high) - low), 16))) * (1 - ts_rank(returns, 32))) ", 
 37 |     "36": "(((((2.21 * rank(correlation((close - opens), delay(volume, 1), 15))) + (0.7 * rank((opens - close)))) + (0.73 * rank(ts_rank(delay((-1 * returns), 6), 5)))) + rank(abs(correlation(vwap, adv20, 6)))) + (0.6 * rank((((sum(close, 200) / 200) - opens) * (close - opens))))) ", 
 38 |     "37": "(rank(correlation(delay((opens - close), 1), close, 200)) + rank((opens - close))) ", 
 39 |     "38": "((-1 * rank(ts_rank(close, 10))) * rank((close / opens)))", 
 40 |     "39": "((-1 * rank((delta(close, 7) * (1 - rank(decay_linear((volume / adv20), 9)))))) * (1 + rank(sum(returns, 250))))", 
 41 |     "40": "((-1 * rank(stddev(high, 10))) * correlation(high, volume, 10))", 
 42 |     "41": "(((high * low)^0.5) - vwap) ", 
 43 |     "42": "(rank((vwap - close)) / rank((vwap + close)))", 
 44 |     "43": "(ts_rank((volume / adv20), 20) * ts_rank((-1 * delta(close, 7)), 8)) ", 
 45 |     "44": "(-1 * correlation(high, rank(volume), 5)) ", 
 46 |     "45": "(-1 * ((rank((sum(delay(close, 5), 20) / 20)) * correlation(close, volume, 2)) *rank(correlation(sum(close, 5), sum(close, 20), 2)))) ", 
 47 |     "46": "((0.25 < (((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10))) ? (-1 * 1) : (((((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) < 0) ? 1 : ((-1 * 1) * (close - delay(close, 1))))) ", 
 48 |     "47": "((((rank((1 / close)) * volume) / adv20) * ((high * rank((high - close))) / (sum(high, 5) / 5))) - rank((vwap - delay(vwap, 5)))) ", 
 49 |     "48": "(indneutralize(((correlation(delta(close, 1), delta(delay(close, 1), 1), 250) * delta(close, 1)) / close), IndClass.subindustry) / sum(((delta(close, 1) / delay(close, 1))^2), 250)) ", 
 50 |     "49": "(((((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) < (-1 *0.1)) ? 1 : ((-1 * 1) * (close - delay(close, 1))))", 
 51 |     "50": "(-1 * ts_max(rank(correlation(rank(volume), rank(vwap), 5)), 5)) ", 
 52 |     "51": "(((((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) < (-1 *0.05)) ? 1 : ((-1 * 1) * (close - delay(close, 1)))) ", 
 53 |     "52": "((((-1 * ts_min(low, 5)) + delay(ts_min(low, 5), 5)) * rank(((sum(returns, 240) - sum(returns, 20)) / 220))) * ts_rank(volume, 5)) ", 
 54 |     "53": "(-1*delta((((close-low) - (high-close)) / (close-low)), 9))", 
 55 |     "54": "((-1 * ((low - close) * (opens^5))) / ((low - high) * (close^5)))", 
 56 |     "55": "(-1 * correlation(rank(((close - ts_min(low, 12)) / (ts_max(high, 12) - ts_min(low,12)))), rank(volume), 6))", 
 57 |     "56": "(0 - (1 * (rank((sum(returns, 10) / sum(sum(returns, 2), 3))) * rank((returns * cap)))))", 
 58 |     "57": "(0 - (1*((close - vwap)/decay_linear(rank(ts_argmax(close, 30)),2))))", 
 59 |     "58": "(-1 * ts_rank(decay_linear(correlation(indneutralize(vwap, IndClass.sector), volume,3.92795), 7.89291), 5.50322)) ", 
 60 |     "59": "(-1 * ts_rank(decay_linear(correlation(indneutralize(((vwap * 0.728317) + (vwap *(1 - 0.728317))), IndClass.industry), volume, 4.25197), 16.2289), 8.19648)) ", 
 61 |     "60": "(0 - (1 * ((2 * scale(rank(((((close - low) - (high - close)) / (high - low)) * volume)))) -scale(rank(ts_argmax(close, 10)))))) ", 
 62 |     "61": "(rank((vwap - ts_min(vwap, 16.1219))) < rank(correlation(vwap, adv180, 17.9282))) ", 
 63 |     "62": "((rank(correlation(vwap, sum(adv20, 22.4101), 9.91009)) < rank(((rank(opens) + rank(opens)) < (rank(((high + low) / 2)) + rank(high))))) * -1) ", 
 64 |     "63": "((rank(decay_linear(delta(indneutralize(close, IndClass.industry), 2.25164), 8.22237)) - rank(decay_linear(correlation(((vwap * 0.318108) + (opens * (1 - 0.318108))), sum(adv180,37.2467), 13.557), 12.2883))) * -1) ", 
 65 |     "64": "((rank(correlation(sum(((opens * 0.178404) + (low * (1 - 0.178404))), 12.7054),sum(adv120, 12.7054), 16.6208)) < rank(delta(((((high + low) / 2) * 0.178404) + (vwap * (1 -0.178404))), 3.69741))) * -1) ", 
 66 |     "65": "((rank(correlation(((opens * 0.00817205) + (vwap * (1 - 0.00817205))), sum(adv60,8.6911), 6.40374)) < rank((opens - ts_min(opens, 13.635)))) * -1) ", 
 67 |     "66": "((rank(decay_linear(delta(vwap, 3.51013), 7.23052)) + ts_rank(decay_linear(((((low* 0.96633) + (low * (1 - 0.96633))) - vwap) / (opens - ((high + low) / 2))), 11.4157), 6.72611)) * -1) ", 
 68 |     "67": "((rank((high - ts_min(high, 2.14593)))^rank(correlation(indneutralize(vwap,IndClass.sector), indneutralize(adv20, IndClass.subindustry), 6.02936))) * -1) ", 
 69 |     "68": "((ts_rank(correlation(rank(high), rank(adv15), 8.91644), 13.9333) < rank(delta(((close * 0.518371) + (low * (1 - 0.518371))), 1.06157))) * -1) ", 
 70 |     "69": "((rank(ts_max(delta(indneutralize(vwap, IndClass.industry), 2.72412),4.79344))^ts_rank(correlation(((close * 0.490655) + (vwap * (1 - 0.490655))), adv20, 4.92416),9.0615)) * -1) ", 
 71 |     "70": "((rank(delta(vwap, 1.29456))^ts_rank(correlation(indneutralize(close,IndClass.industry), adv50, 17.8256), 17.9171)) * -1) ", 
 72 |     "71": "max(ts_rank(decay_linear(correlation(ts_rank(close, 3.43976), ts_rank(adv180,12.0647), 18.0175), 4.20501), 15.6948), ts_rank(decay_linear((rank(((low + opens) - (vwap + vwap)))^2), 16.4662), 4.4388))", 
 73 |     "72": "(rank(decay_linear(correlation(((high + low) / 2), adv40, 8.93345), 10.1519)) / rank(decay_linear(correlation(ts_rank(vwap, 3.72469), ts_rank(volume, 18.5188), 6.86671),2.95011))) ", 
 74 |     "73": "(max(rank(decay_linear(delta(vwap, 4.72775), 2.91864)), ts_rank(decay_linear(((delta(((opens * 0.147155) + (low * (1 - 0.147155))), 2.03608) / ((opens * 0.147155) + (low * (1 - 0.147155)))) * -1), 3.33829), 16.7411)) * -1) ", 
 75 |     "74": "((rank(correlation(close, sum(adv30, 37.4843), 15.1365)) <rank(correlation(rank(((high * 0.0261661) + (vwap * (1 - 0.0261661)))), rank(volume), 11.4791)))* -1) ", 
 76 |     "75": "(rank(correlation(vwap, volume, 4.24304)) < rank(correlation(rank(low), rank(adv50),12.4413)))", 
 77 |     "76": "(max(rank(decay_linear(delta(vwap, 1.24383), 11.8259)),ts_rank(decay_linear(ts_rank(correlation(indneutralize(low, IndClass.sector), adv81,8.14941), 19.569), 17.1543), 19.383)) * -1) ", 
 78 |     "77": "min(rank(decay_linear(((((high + low) / 2) + high) - (vwap + high)), 20.0451)),rank(decay_linear(correlation(((high + low) / 2), adv40, 3.1614), 5.64125))) ", 
 79 |     "78": "(rank(correlation(sum(((low * 0.352233) + (vwap * (1 - 0.352233))), 19.7428),sum(adv40, 19.7428), 6.83313))^rank(correlation(rank(vwap), rank(volume), 5.77492))) ", 
 80 |     "79": "(rank(delta(indneutralize(((close * 0.60733) + (opens * (1 - 0.60733))),IndClass.sector), 1.23438)) < rank(correlation(ts_rank(vwap, 3.60973), ts_rank(adv150,9.18637), 14.6644))) ", 
 81 |     "80": "((rank(sign(delta(indneutralize(((opens * 0.868128) + (high * (1 - 0.868128))),IndClass.industry), 4.04545)))^ts_rank(correlation(high, adv10, 5.11456), 5.53756)) * -1) ", 
 82 |     "81": "((rank(log(product(rank((rank(correlation(vwap, sum(adv10, 49.6054),8.47743))^4)), 14.9655))) < rank(correlation(rank(vwap), rank(volume), 5.07914))) * -1) ", 
 83 |     "82": "(min(rank(decay_linear(delta(opens, 1.46063), 14.8717)),ts_rank(decay_linear(correlation(indneutralize(volume, IndClass.sector), ((opens * 0.634196) +(opens * (1 - 0.634196))), 17.4842), 6.92131), 13.4283)) * -1) ", 
 84 |     "83": "((rank(delay(((high - low) / (sum(close, 5) / 5)), 2)) * rank(rank(volume))) / (((high -low) / (sum(close, 5) / 5)) / (vwap - close))) ", 
 85 |     "84": "signedpower(ts_rank((vwap - ts_max(vwap, 15.3217)), 20.7127), delta(close,4.96796)) ", 
 86 |     "85": "(rank(correlation(((high * 0.876703) + (close * (1 - 0.876703))), adv30,9.61331))^rank(correlation(ts_rank(((high + low) / 2), 3.70596), ts_rank(volume, 10.1595),7.11408))) ", 
 87 |     "86": "((ts_rank(correlation(close, sum(adv20, 14.7444), 6.00049), 20.4195) < rank(((opens+ close) - (vwap + opens)))) * -1) ", 
 88 |     "87": "(max(rank(decay_linear(delta(((close * 0.369701) + (vwap * (1 - 0.369701))),1.91233), 2.65461)), ts_rank(decay_linear(abs(correlation(indneutralize(adv81,IndClass.industry), close, 13.4132)), 4.89768), 14.4535)) * -1) ", 
 89 |     "88": "min(rank(decay_linear(((rank(opens) + rank(low)) - (rank(high) + rank(close))),8.06882)), ts_rank(decay_linear(correlation(ts_rank(close, 8.44728), ts_rank(adv60,20.6966), 8.01266), 6.65053), 2.61957)) ", 
 90 |     "89": "(ts_rank(decay_linear(correlation(((low * 0.967285) + (low * (1 - 0.967285))), adv10,6.94279), 5.51607), 3.79744) - ts_rank(decay_linear(delta(indneutralize(vwap,IndClass.industry), 3.48158), 10.1466), 15.3012)) ", 
 91 |     "90": "((rank((close - ts_max(close, 4.66719)))^ts_rank(correlation(indneutralize(adv40,IndClass.subindustry), low, 5.38375), 3.21856)) * -1) ", 
 92 |     "91": "((ts_rank(decay_linear(decay_linear(correlation(indneutralize(close,IndClass.industry), volume, 9.74928), 16.398), 3.83219), 4.8667) -rank(decay_linear(correlation(vwap, adv30, 4.01303), 2.6809))) * -1) ", 
 93 |     "92": "min(ts_rank(decay_linear(((((high + low) / 2) + close) < (low + opens)), 14.7221),18.8683), ts_rank(decay_linear(correlation(rank(low), rank(adv30), 7.58555), 6.94024),6.80584)) ", 
 94 |     "93": "(ts_rank(decay_linear(correlation(indneutralize(vwap, IndClass.industry), adv81,17.4193), 19.848), 7.54455) / rank(decay_linear(delta(((close * 0.524434) + (vwap * (1 -0.524434))), 2.77377), 16.2664))) ", 
 95 |     "94": "((rank((vwap - ts_min(vwap, 11.5783)))^ts_rank(correlation(ts_rank(vwap,19.6462), ts_rank(adv60, 4.02992), 18.0926), 2.70756)) * -1)", 
 96 |     "95": "(rank((opens - ts_min(opens, 12.4105))) < ts_rank((rank(correlation(sum(((high + low)/ 2), 19.1351), sum(adv40, 19.1351), 12.8742))^5), 11.7584)) ", 
 97 |     "96": "(max(ts_rank(decay_linear(correlation(rank(vwap), rank(volume), 3.83878),4.16783), 8.38151), ts_rank(decay_linear(ts_argmax(correlation(ts_rank(close, 7.45404),ts_rank(adv60, 4.13242), 3.65459), 12.6556), 14.0365), 13.4143)) * -1)", 
 98 |     "97": "((rank(decay_linear(delta(indneutralize(((low * 0.721001) + (vwap * (1 - 0.721001))),IndClass.industry), 3.3705), 20.4523)) - ts_rank(decay_linear(ts_rank(correlation(ts_rank(low,7.87871), ts_rank(adv60, 17.255), 4.97547), 18.5925), 15.7152), 6.71659)) * -1) ", 
 99 |     "98": "(rank(decay_linear(correlation(vwap, sum(adv5, 26.4719), 4.58418), 7.18088)) -rank(decay_linear(ts_rank(ts_argmin(correlation(rank(opens), rank(adv15), 20.8187), 8.62571),6.95668), 8.07206))) ", 
100 |     "99": "((rank(correlation(sum(((high + low) / 2), 19.8975), sum(adv60, 19.8975), 8.8136)) <rank(correlation(low, volume, 6.28259))) * -1) ", 
101 |     "100": "(0 - (1 * (((1.5 * scale(indneutralize(indneutralize(rank(((((close - low) - (high -close)) / (high - low)) * volume)), IndClass.subindustry), IndClass.subindustry))) -scale(indneutralize((correlation(close, rank(adv20), 5) - rank(ts_argmin(close, 30))),IndClass.subindustry))) * (volume / adv20)))) ", 
102 |     "101": "((close - opens) / ((high - low) + .001))"
103 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="https://user-images.githubusercontent.com/16124573/45173356-f3b9f180-b1d5-11e8-97ba-5e92154c630a.png" width="400">
  2 | 
  3 | [![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)](https://opensource.org/licenses/Apache-2.0)
  4 | [![Python](https://img.shields.io/badge/Python-3.5|3.6-blue.svg)](https://opensource.org/licenses/Apache-2.0)
  5 | [![Build Status](https://travis-ci.org/marketneutral/alphatools.svg?branch=master)](https://travis-ci.org/marketneutral/alphatools)
  6 | 
  7 | This package aims to provide environments within which best-in-class open source tools across **both** financial research (e.g., `zipline`, `alphelens`, and `pyfolio`) and machine learning (e.g., `scikit-learn`, `LightGBM`, `PyMC3`, `pytorch`, and `fastai`) operate together. The "stable" enviroment is on Python 3.5 and does not include `fastai`. The "latest" environment is on Python 3.6 and relies on the backwards compatibility PEP for packages which state only 3.5 support (e.g., `zipline`). The latest environment includes the pre-release of PyTorch 1.0 and fastai 1.0.x. The PyTorch version in both environments is currently "CPU" only (i.e., no GPU/CUDA for now). The "tests" are only testing that the environments are built without conflict for now.
  8 | 
  9 | Additionally, this package provides functions to make the equity alpha factor research process more accessible and productive. Convenience functions sit on top of [zipline](https://github.com/quantopian/zipline) and, specifically, the [`Pipeline`](https://www.quantopian.com/help#pipeline-api) cross-sectional classes and functions in that package. `alphatools` allows you to 
 10 | 
 11 | - `run_pipeline` in a Jupyter notebook (or from any arbitrary Python code) **in your local environment**,
 12 | - create `Pipeline` factors **at runtime** on **arbitrary data sources** (just expose the endpoint for data sitting somewhere, specify the schema, and...it's available for use in `Pipeline`!),
 13 | - parse and compile **"expression" style alphas** as described the paper ["101 Formulaic Alphas"](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2701346) into `Pipeline` factors, and
 14 | - work with and plot ingested **pricing data from an arbitrary bundle** with a `get_pricing(...)` function call.
 15 | 
 16 | For example, with `alphatools`, you can, say, within a Jupyter notebook,
 17 | 
 18 | ```python
 19 | from alphatools.research import run_pipeline
 20 | from alphatools.ics import Sector
 21 | from alphatools.data import Factory
 22 | from alphatools.expression import ExpressionAlpha
 23 | from zipline.pipeline.data import USEquityPricing as USEP
 24 | from zipline.pipeline.factors import Returns, AverageDollarVolume
 25 | from zipline.pipeline import Pipeline
 26 | 
 27 | universe = AverageDollarVolume(window_length=120).top(500)
 28 | 
 29 | my_factor = (
 30 |     -Returns(mask=universe, window_length=5).
 31 |     demean(groupby=Sector()).
 32 |     rank()
 33 | )
 34 | 
 35 | expr_factor = (
 36 |     ExpressionAlpha(
 37 |         'rank(indneutralize(-log(close/delay(close, 4))),IndClass.sector)'
 38 |     ).make_pipeline_factor().pipeline_factor(mask=universe)
 39 | )
 40 | 
 41 | p = Pipeline(screen=universe)
 42 | 
 43 | p.add(my_factor, '5d_MR_Sector_Neutral_Rank')
 44 | p.add(expr_factor, '5d_MR_Expression Alpha')
 45 | 
 46 | p.add(Factory['my_special_data'].value.latest.zscore(), 'Special_Factor')
 47 | 
 48 | start_date = '2017-01-04'
 49 | end_date = '2017-12-28'
 50 | 
 51 | df = run_pipeline(p, start_date, end_date)
 52 | ```
 53 | 
 54 | ## Bring Your Own Data
 55 | 
 56 | To "Bring Your Own Data", you simply point the Factory object to an endpoint and specify the schema. This is done by adding to the `json` file `data_sources.json`. For example, if you have a `csv` file on disk, `data.csv`, and a PostgreSQL table somewhere else, you would create `data_sources.json` as
 57 | 
 58 | ```json
 59 | {
 60 | 	"my_special_data": {
 61 | 		"url": "/full/path/to/data/data.csv",
 62 | 		"schema": "var * {asof_date: datetime, sid: int64, value: float64}"
 63 | 	},
 64 | 	
 65 | 	"my_database_data": {
 66 | 		"url": "postgresql://$USER:$PASS@hostname::my-table-name",
 67 | 		"schema": "var * {asof_date: datetime, sid: int64, price_to_book: float64}"
 68 | }
 69 | ```
 70 | 
 71 | In the case of the example PostgreSQL `url`, note that the text `$USER` will be substituted with the text in the environment variable `USER` and the text `$PASS` will be substituted with the text in the environment variable `PASS`. Basically, any text token in the `url` which is preceeded by `$` will be substituted by the text in the environment variable of that name. Hence, you do not need to expose actual credentials in this file.
 72 | 
 73 | The `schema` is specified as a `dshape` from the package `datashape` (docs [here]()). The magic happens via the `blaze/datashape/odo` stack. You can specify the `url` to a huge variety of source formats including `json`, `csv`, PostgreSQL tables, MongoDB collections, `bcolz`, Microsoft Excel(!?), `.gz` compressed files, collections of files (e.g., `myfiles_*.csv`), and remote locations like Amazon S3 and a Hadoop Distributed File System. To me, the [`odo`](https://en.wikipedia.org/wiki/Odo_(Star_Trek)) [documentation on URI strings](http://odo.pydata.org/en/latest/uri.html) is the clearest explanation on this.
 74 | 
 75 | Note that this data must be mapped to the `sid` as mapped by `zipline ingest`. Also, the data rowwise dates must be in a column titled `asof_date`. You can then access this data like
 76 | 
 77 | ```python
 78 | from alphatools.data import Factory
 79 | 	:
 80 | 	:
 81 | 	:
 82 | 	
 83 | my_factor = Factory['my_database_data'].price_to_book.latest.rank()
 84 | p.add(my_factor)
 85 | ```
 86 | 
 87 | This functionality should allow you to use new data in research very quickly with the absolute minimal amount of data engineering and/or munging. For example, commercial risk model providers often provide a single file per day for factor loadings (e.g., `data_yyyymmdd_fac.csv`). After `sid` mapping and converting the date column name to `asof_date`,  this data can be immediately available in `Pipeline` by putting a `url` in `data_sources.json` like `"url": "/path/to/dir/data_*_fac.csv"`, and `schema` like `"var * {asof_date: datetime, sid: int64, MKT_BETA: float64, VALUE: float64, MOMENTUM: float64, ST_REVERSAL: float64 ..."`.
 88 | 
 89 | ## Expression Alphas
 90 | 
 91 | The ability to parse "expression" alphas is meant to help speed the research process and/or allow financial professionals with minimal Python experience to test alpha ideas. See ["101 Formulaic Alphas"](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2701346) for details on this DSL. The (EBNF) grammar is fully specified ["here"](https://github.com/marketneutral/alphatools/blob/master/alphatools/expression/expression.lark). We use the `Lark` Python [parsing library](https://github.com/lark-parser/lark) (great name, no relation). Currently, the data for `open`, `high`, `low`, `close`, `volume` are accessible; the following calculations and operators are implemented
 92 | 
 93 | * `vwap`: the daily vwap (as a default, this is approximated with `(close + (opens + high + low)/3)/2`).
 94 | * `returns`: daily close-to-close returns.
 95 | * `+`,`-`, `*`, `/`, `^`: as expected, though only for two terms (i.e., only \<expr\> \<op\> \<expr\>); `^` is exponentiation, not bitwise or.
 96 | * `-x`: unary minus on x (i.e., negation).
 97 | * `abs(x)`, `log(x)`, `sign(x)`: elementwise standard math operations.
 98 | * `>`, `<`, `==`, `||`: elementwise comparator operations returning 1 or 0.
 99 | * `x ? y : z`: C-style ternary operator; `if x: y; else z`.
100 | * `rank(x)`: scaled ranks, per day, across all assets (i.e., the cross-sectional rank); ranks are descending such that the rank of the maximum raw value in the vector is 1.0; the smallest rank is 1/N. The re-scale of the ranks to the interval [1/N,1] is implied by Alpha 1: 0.50 is subtracted from the final ranked value. The ordinal method is used to match `Pipeline` method `.rank()`.
101 | * `delay(x, days)`: *x* lagged by *days*. Note that the *days* parameter in `delay` and `delta` differs from the `window_length` parameter you may be familiar with in `Pipeline`. The `window_length` refers to a the number of data points in the (row axis of the) data matrix, *not* the number of days lag. For example, in `Pipeline` if you want daily returns, you specify a `window_length` of `2` since you need 2 data points--today and the day prior--to get a daily return. In an expression alpha, the *days* is the lag *from today*. Concretely, a simple example to show is: the `Pipeline` factor `Returns(window_length=2)` is precisely equal to the expression alpha `delta(close,1)/delay(close,1)`.
102 | * `correlation(x, y, days)`: the Pearson correlation of the values for assets in *x* to the corresponding values for the same assets in *y* over *days*; note this is very slow in the current implementation.
103 | * `covariance(x, y, days)`: the covariance of the values for assets in *x* to the corresponding values for the same assets in *y* over *days*; note this is very slow as well currently.
104 | * `delta(x, days)`: diff on *x* per *days* timestep.
105 | * `signedpower(x, a)`: elementwise `sign(x)*(abs(x)^a)`.
106 | * `decay_linear(x, days)`: weighted sum of *x* over the past *days* with linearly decaying weights (weights sum to 1; max of the weights is on the most recent day).
107 | * `indneutralize(x, g)`: `x`, cross-sectionally "neutralized" (i.e., demeaned) against the group membership classifier `g`. `g` must be in the set {`IndClass.sector`, `IndClass.industry`, `IndClass.subindustry`}. The set `g` maps to the `Pipeline` classifiers `Sector()` and `SubIndustry()` in `alphatools.ics`. Concretely, the `Pipeline` factor `Returns().demean(groupby=Sector())` is equivalent (save a corner case on NaN treatment) to the expression `indneutralize(returns, IndClass.sector)`. If you do not specifically pass a token for `g`, the default of `IndClass.industry` is applied.
108 | * `ts_max(x, days)`: the per asset time series max on *x* over the trailing *days* (also `ts_min(...)`).
109 | * `max(a, b)`: The paper says that `max` is an alias for `ts_max(a, b)`; I think this is an error. Alphas 71, 73, 76, 87, and 96 do not parse with `max` as alias for `ts_max`. Rather I believe that `max` means elementwise maximum of two arrays (i.e., like `pmax(...)` in R and `np.maximum(...)` in Numpy) and have implemented it as such; same for `min(a, b)`. 
110 | * `ts_argmax(x, days)`: on which day `ts_max(x, days)` occurred (also `ts_argmin(...)`) scaled to the interval [1/days,1]. For example, if window (*days*) is 10 days, and the max is in the most recent day, it will return 1.0; if the max is in the earliest day it will return 0.10.
111 | * `ts_rank(x, days)`: the time series rank per asset on *x* over the the trailing *days*. Currently this is in the range [0,1], but should be [1/days,1].
112 | * `sum(x, days)`: the sum per asset on *x* over the trailing *days*.
113 | * `product(x, days)`: the product per asset on *x* over the trailing *days*.
114 | * `stddev(x, days)`: the standard deviation per asset on *x* over the trailing *days*.
115 | * `adv{days}`: the average daily **dollar** volume per asset over the trailing *days* (e.g., `adv20` gives the 20-day trailing average daily dollar volume).
116 | 
117 | The expression alpha parser produces `zipline` compatible `Pipeline` factor code. This implementation makes use of the `bottleneck` package which provides many `numpy`-style rolling aggregations, implemented in highly optimized compiled C code. The `bottleneck` package is distributed in binary form in the Anaconda Python distribution (see Installation below).
118 | 
119 | For example, the expression alpha "#9" from the paper
120 | 
121 | ```
122 | ((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ? delta(close, 1) : (-1 * delta(close, 1))))
123 | ```
124 | 
125 | is compiled into a usable `Pipeline` factor, `e`, as
126 | 
127 | ```python
128 | e = (
129 | 	ExpressionAlpha('((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ? delta(close, 1) : (-1 * delta(close, 1))))).
130 | 	make_pipeline_factor().
131 | 	pipeline_factor(mask=universe)
132 | )
133 | ```
134 | 
135 | 
136 | The abstract snytax tree ("AST") can be visualized with `from lark.tree import pydot__tree_to_png; pydot__tree_to_png(e.tree, "alpha9.png")`:
137 | 
138 | <img src="https://user-images.githubusercontent.com/16124573/45169838-6e7e0f00-b1cc-11e8-9967-0c9d8bf70172.png" width="750">
139 | 
140 | This is quite helpful, in my opinion, to understand a third-party alpha like this. So what's happening? Looking top to bottom at each level, left to right: if zero is less than the minimum of the daily price change over the trailing five days (i.e., if the stock has gone **up** *every day* for the last five days), then the factor value is simpy the price change over the *most recent* day, which is a positive number by definition, and thus bets that positive momentum will continue. That branch should be pretty rare (meaning it would be rare for a stock to go up every day for five days in a row). Otherwise, we check if the max price change in the last 5 days is less than zero (i.e., the stock has gone **down** *every day* for the last 5 days), then the factor value again is just the price change over the *most recent day*, which is a negative number by definition. Thus if the stock has gone straight down for 5 days, the factor bets that it will continue. This should also be rare. Lastly, if neither of these two states exist, the factor value is just -1 times the last day's price change; i.e., a bet on mean reversion. Hence, by inspecting the parse tree like this, we can understand that this alpha is a momentum/mean-reversion switching factor; it assumes momentum will persist if the prior five days have moved in the same direction, otherwise it assumes mean-reversion will occur.
141 | 
142 | You can see the resuling `Pipeline` code (though this is not necessary to use the alpha in `run_pipeline`) with `print(e.pipeline_code)`:
143 | 
144 | ```python
145 | class ExprAlpha_1(CustomFactor):
146 |     inputs = [USEP.close]
147 |     window_length = 17
148 | 
149 |     def compute(self, today, assets, out, close):
150 |         v0 = close - np.roll(close, 1, axis=0)
151 |         v1 = bn.move_min(v0, window=5, min_count=1,  axis=0)
152 |         v2 = np.less(0, v1)
153 |         v3 = close - np.roll(close, 1, axis=0)
154 |         v4 = close - np.roll(close, 1, axis=0)
155 |         v5 = bn.move_max(v4, window=5, min_count=1,  axis=0)
156 |         v6 = np.less(v5, 0)
157 |         v7 = close - np.roll(close, 1, axis=0)
158 |         v8 = close - np.roll(close, 1, axis=0)
159 |         v9 = 1*v8
160 |         v10 = -v9
161 |         v11 = np.where(v6, v7, v10)
162 |         v12 = np.where(v2, v3, v11)
163 |         out[:] = v12[-1]
164 | ```
165 | 
166 | There is no compile-time optimization of the AST at all! What is happening is that the compiler walks down the AST and converts each node into a Python equivalent (`numpy`, `bottleneck`, and/or `pandas`) expression, keeping track of the call stack so that future references to prior calculations are correct. The resulting Python code is in the style of "three-address code". There is of course plenty of optimization which can be done.
167 | 
168 | Note that there is no reference implementation of the expression-style alpha syntax to test against and that there are many specific details lacking the paper. As such, this implementation makes some assumptions where necessary (as a simple example, the paper does not specify if `rank` is ascending or descending, however, it obviously should be ascending as a larger raw value should produce a larger numberical rank to keep the alpha vector *directly* proportional). This is experimental and I have created only a handful of tests.
169 | 
170 | ### Using Your Own Data in Expression Alphas
171 | 
172 | It is also possible to use the "bring your own data" functionality provided by the `Factory` object in an expression alpha. This is done with one or more `factory` expressions. The syntax is
173 | 
174 | * `factory("<dataset>")`: where `"<dataset>"` is the name you would pass into the `Factory` object (for now assuming the data is in a column called "value"). Concretely, if you have a dataset, "sample", defined in the `data_sources.json` file, you can access it in an expression as:
175 | 
176 | ```
177 | (returns > 0) ? factory("sample") : -sum(returns, 5)
178 | ```
179 | 
180 | This compiles to the `Pipeline` factor as:
181 | 
182 | ```python
183 | class ExprAlpha_1(CustomFactor):
184 |     inputs = [Returns(window_length=2), Factory["sample"].value]
185 |     window_length = 7
186 | 
187 |     def compute(self, today, assets, out, returns, factory0):
188 |         v0 = np.greater(returns, 0)
189 |         v1 = pd.DataFrame(data=returns).rolling(
190 |             window=5, center=False, min_periods=1).sum().values
191 |         v2 = -v1
192 |         v3 = np.where(v0, factory0, v2)
193 |         out[:] = v3[-1]
194 | ```
195 | 
196 | 
197 | ## Installation
198 | 
199 | Run the following in order:
200 | 
201 | ```
202 | git clone https://github.com/marketneutral/alphatools
203 | cd alphatools
204 | ./install_stable.sh
205 | zipline ingest
206 | ```
207 | 
208 | Note that when you run `zipline ingest` the security master is built from scratch and each `sid` is assigned at that time. You must map the `Sector`, `Industry` classifiers in this package **and all your own data** after every `zipline ingest`. You can map the `Sector` and `Industry` classifiers with
209 | 
210 | ```
211 | alphatools ingest
212 | ```
213 | 
214 | 
215 | ## A Word on Sector and Industry Classfiers Included
216 | 
217 | Sector and Industry data were scraped from Yahoo Finance on September 18, 2017 for the full Quandl WIKI universe at that time. The SIC and CIK codes were scraped from [Rank and Filed](http://rankandfiled.com/) on September 15, 2017. The classifiers built from this data assume that the codes have never and do never change (i.e., there is no concept of an asset being reclassified over time). **Be aware that there is lookahead bias in this** (e.g., a good example of why there is lookahead bias is with Corning, Inc. which is classified as a Technology/Electronic Components company in this dataset, but from 1851 to the 2000s(?) was actually classified as a boring Industrial glass company; the economic make up the company changed sometime in the early 1990s when optic fiber production became an important revenue driver and later with iPhone glass. At some point, the ICS providers changed the classification from "boring" to "high tech", but this was surely lagging the actual transformation of the company; hence...lookahead bias).
218 | 
219 | ## A Word on Fundamental Data
220 | 
221 | Altough there is a `Fundamentals` factor included, there is no Fundamental data included in the package. This factor was built on top of the `DataFrameLoader` to get a `pandas.DataFrame` into a factor. I think I will deprecate this in favor of using the `Factory` object as described above. In the meantime, the `Fundamentals` pipeline factors can be built from `make_fundamentals.py` with your own data. Note that these factors use the `DataFrameLoader` which means the data must fit in memory. 
222 | 
223 | ## Disclaimer
224 | 
225 | Though this is in the `LICENSE` file, it bears noting that this software is provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE
226 | 
227 | Additionally, nothing in this package constitutes investment advice. This package is a personal project and nothing in its functionality or examples is reflective of any past or current employer.
228 | 
229 | Lastly, there are no automated tests (or any significnat tests for that matter), no automated nightly build, no docstrings, or any other features associated with what you might consider a well supported open source package. 
230 | 
231 | ## Contributing
232 | 
233 | I hope you enjoy this package. Please leave feedback, or better, contribute. If you are planning to make a PR, please get in touch with me before you do any work as I have a project plan. I am figuring this out as I go and could use help, especially with (in order)
234 | 
235 | - Incorporating `six` so that the package works with Python 3.x and Python 2.7
236 | - Creating tests and using Travis CI on this repo
237 | - Python packaging
238 | - Dockerizing this thing so we can avoid the painful install process
239 | 


--------------------------------------------------------------------------------
/alphatools/expression/expression.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import autopep8
  3 | import itertools
  4 | from lark import Lark, Transformer
  5 | from os import path
  6 | from scipy.stats import rankdata
  7 | from six import iteritems, next
  8 | 
  9 | 
 10 | class MyTransformer(Transformer):
 11 |     
 12 |     def __init__(self):
 13 |         self.cmdlist = []
 14 |         self.window = 2
 15 |         self.vcounter = itertools.count()
 16 |         self.stack = []
 17 |         
 18 |         self.imports = set()
 19 | 
 20 |         self.factory_counter = itertools.count()
 21 |         self.factories = dict()
 22 | 
 23 |         self.inputs = dict()
 24 | 
 25 |         
 26 |     def factory(self, items):
 27 |         self.imports.add('from alphatools.data import Factory')
 28 |         this_factory = self.factory_counter.next()
 29 |         self.stack.append('factory' + str(this_factory))
 30 |         self.factories[this_factory] = items[0]
 31 |         self.inputs['factory'+str(this_factory)] = 'Factory['+items[0]+'].value'
 32 |         
 33 |     def neg(self, items):
 34 |         term1 = self.stack.pop()
 35 |         thisv = next(self.vcounter)
 36 |         self.stack.append('v' + str(thisv))
 37 |         self.cmdlist.append(
 38 |             'v' + str(thisv) + ' = -' + term1
 39 |         )
 40 | 
 41 |     def rank(self, items):
 42 |         self.imports.add("from scipy.stats import rankdata")
 43 |         term1 = self.stack.pop()
 44 |         v1 = next(self.vcounter)
 45 |         self.cmdlist.append(
 46 |             'v' + str(v1) + ' = np.apply_along_axis(rankdata, 1, ' + term1 +', method="ordinal")'
 47 |         )
 48 |         v2 = next(self.vcounter)
 49 |         self.stack.append('v' + str(v2))
 50 |         self.cmdlist.append(
 51 |             'v' + str(v2) + ' = np.divide(v'+str(v1)+'.astype(float), np.sum(~np.isnan(v'+str(v1)+'), axis=1).reshape(v'+str(v1)+'.shape[0], 1))'
 52 |         )
 53 |         
 54 |     
 55 | #    def close(self, items):
 56 | #        thisv = self.vcounter.next()
 57 | #        self.stack.append('v' + str(thisv))
 58 | #        self.cmdlist.append(
 59 | #            'v' + str(thisv) + ' = close'
 60 | #        )
 61 | 
 62 |     def cap(self, items):
 63 |         thisv = next(self.vcounter)
 64 |         self.stack.append('v' + str(thisv))
 65 |         self.cmdlist.append(
 66 |             'v' + str(thisv) + ' = 1.0'
 67 |         )
 68 | 
 69 |     def number(self, items):
 70 |         #import pdb; pdb.set_trace()
 71 |         self.stack.append(str(items[0].value))
 72 |         pass
 73 | 
 74 |     def close(self, items):
 75 |         self.imports.add('from zipline.pipeline.data import USEquityPricing as USEP')
 76 |         self.inputs['close'] = 'USEP.close'
 77 |         self.stack.append('close')
 78 | 
 79 |     def high(self, items):
 80 |         self.imports.add('from zipline.pipeline.data import USEquityPricing as USEP')
 81 |         self.inputs['high'] = 'USEP.high'
 82 |         self.stack.append('high')
 83 | 
 84 |     def low(self, items):
 85 |         self.imports.add('from zipline.pipeline.data import USEquityPricing as USEP')
 86 |         self.inputs['low'] = 'USEP.low'
 87 |         self.stack.append('low')
 88 |         
 89 |     def volume(self, items):
 90 |         self.imports.add('from zipline.pipeline.data import USEquityPricing as USEP')
 91 |         self.inputs['volume'] = 'USEP.volume'
 92 |         self.stack.append('volume')
 93 | 
 94 |     def vwap(self, items):
 95 |         self.imports.add('from zipline.pipeline.data import USEquityPricing as USEP')
 96 |         self.inputs['close'] = 'USEP.close'
 97 |         self.inputs['opens'] = 'USEP.open'
 98 |         self.inputs['high'] = 'USEP.high'
 99 |         self.inputs['low'] = 'USEP.low'
100 |         
101 |         thisv = next(self.vcounter)
102 |         self.stack.append('v' + str(thisv))
103 |         self.cmdlist.append(
104 |             'v' + str(thisv) + ' = (close + (opens + high + low)/3)/2'
105 |         )
106 | 
107 |     def adv(self, items):
108 |         self.imports.add('from zipline.pipeline.data import USEquityPricing as USEP')
109 |         self.inputs['close'] = 'USEP.close'
110 |         self.inputs['volume'] = 'USEP.volume'
111 |         thisv = next(self.vcounter)
112 |         self.stack.append('v' + str(thisv))
113 |         self.window = max([self.window, int(items[0])+2])
114 |         self.cmdlist.append(
115 |             'v' + str(thisv) + ' = bn.move_mean(np.multiply(close, volume), window=' + items[0] + ', min_count=1, axis=0)'
116 |         )
117 | #    def opens(self, items):
118 | #        thisv = self.vcounter.next()
119 | #        self.stack.append('v' + str(thisv))
120 | #        self.cmdlist.append(
121 | #            'v' + str(thisv) + ' = opens'
122 | #        )
123 | 
124 |     def opens(self, items):
125 |         self.imports.add('from zipline.pipeline.data import USEquityPricing as USEP')
126 |         self.inputs['opens'] = 'USEP.open'
127 |         self.stack.append('opens')
128 |                 
129 |     def div(self, items):
130 |         term2 = self.stack.pop()
131 |         term1 = self.stack.pop()
132 |         thisv = next(self.vcounter)
133 |         self.stack.append('v' + str(thisv))
134 |         self.cmdlist.append(
135 |             'v' + str(thisv) + ' = ' + term1 + ' / ' + term2
136 |         )
137 | 
138 |     def min(self, items):
139 |         # TODO: check that this is parallel min 
140 |         term2 = self.stack.pop()
141 |         term1 = self.stack.pop()
142 |         thisv = next(self.vcounter)
143 |         self.stack.append('v' + str(thisv))
144 |         self.cmdlist.append(
145 |             'v' + str(thisv) + ' = np.minimum('+term1 + ', ' + term2+')'
146 |         )
147 |         
148 |     def max(self, items):
149 |         # TODO: check that this is parallel max
150 |         # paper says this is == ts_min, but that doesn't parse for alpha 71
151 |         term2 = self.stack.pop()
152 |         term1 = self.stack.pop()
153 |         thisv = next(self.vcounter)
154 |         self.stack.append('v' + str(thisv))
155 |         self.cmdlist.append(
156 |             'v' + str(thisv) + ' = np.maximum('+term1 + ', ' + term2+')'
157 |         )
158 |         
159 |     def powerof(self, items):
160 |         """ Element-wise power """
161 | 
162 |         term2 = self.stack.pop()
163 |         term1 = self.stack.pop()
164 |         thisv = next(self.vcounter)
165 |         self.stack.append('v' + str(thisv))
166 |         self.cmdlist.append(
167 |             'v' + str(thisv) + ' = np.power(' + term1 + ', ' + term2 + ')'
168 |         )
169 | 
170 |     def signedpower(self, items):
171 |         """ np.sign(term1)*np.power(np.abs(term1), term2)  """
172 | 
173 |         term2 = self.stack.pop()
174 |         term1 = self.stack.pop()
175 |         thisv = next(self.vcounter)
176 |         self.stack.append('v' + str(thisv))
177 |         self.cmdlist.append(
178 |             'v' + str(thisv) + ' = np.sign('+term1+')*np.power(np.abs(' + term1 + '), ' + term2 + ')'
179 |         )
180 |         
181 | 
182 |     def minus(self, items):
183 |         term2 = self.stack.pop()
184 |         term1 = self.stack.pop()
185 |         thisv = next(self.vcounter)
186 |         self.stack.append('v' + str(thisv))
187 |         self.cmdlist.append(
188 |             'v' + str(thisv) + ' = ' + term1 + ' - ' + term2
189 |         )
190 | 
191 |     def plus(self, items):
192 |         term2 = self.stack.pop()
193 |         term1 = self.stack.pop()
194 |         thisv = next(self.vcounter)
195 |         self.stack.append('v' + str(thisv))
196 |         self.cmdlist.append(
197 |             'v' + str(thisv) + ' = ' + term1 + ' + ' + term2
198 |         )
199 | 
200 |     def mult(self, items):
201 |         term2 = self.stack.pop()
202 |         term1 = self.stack.pop()
203 |         thisv = next(self.vcounter)
204 |         self.stack.append('v' + str(thisv))
205 |         self.cmdlist.append(
206 |             'v' + str(thisv) + ' = ' + term1 + '*' + term2
207 |         )
208 | 
209 |     def log(self, items):
210 |         term1 = self.stack.pop()
211 |         thisv = next(self.vcounter)
212 |         self.stack.append('v' + str(thisv))
213 |         self.cmdlist.append(
214 |             'v' + str(thisv) + ' = np.log(' + term1 + ')'
215 |         )
216 | 
217 |     def abs(self, items):
218 |         term1 = self.stack.pop()
219 |         thisv = next(self.vcounter)
220 |         self.stack.append('v' + str(thisv))
221 |         self.cmdlist.append(
222 |             'v' + str(thisv) + ' = np.abs(' + term1 + ')'
223 |         )
224 | 
225 |     def sign(self, items):
226 |         term1 = self.stack.pop()
227 |         thisv = next(self.vcounter)
228 |         self.stack.append('v' + str(thisv))
229 |         self.cmdlist.append(
230 |             'v' + str(thisv) + ' = np.sign(' + term1 + ')'
231 |         )
232 |         
233 |     def scale(self, items):
234 |         # TODO: 101 paper says scaled sum(abs)==a; silent on mean
235 |         term1 = self.stack.pop()
236 |         thisv = next(self.vcounter)
237 |         self.stack.append('v' + str(thisv))
238 |         self.cmdlist.append(
239 |             'v' + str(thisv) + ' = np.apply_along_axis(lambda x: (x - np.nanmean(x))/np.nansum(np.abs(x - np.nanmean(x))), 1, ' + term1 +')'
240 |         )
241 |         
242 |     def mult(self, items):
243 |         term2 = self.stack.pop()
244 |         term1 = self.stack.pop()
245 |         thisv = next(self.vcounter)
246 |         self.stack.append('v' + str(thisv))
247 |         self.cmdlist.append(
248 |             'v' + str(thisv) + ' = ' + term1 + '*' + term2
249 |         )
250 |         
251 |     def greaterthan(self, items):
252 |         term2 = self.stack.pop()
253 |         term1 = self.stack.pop()
254 |         thisv = next(self.vcounter)
255 |         self.stack.append('v' + str(thisv))
256 |         self.cmdlist.append(
257 |             'v' + str(thisv) + ' = np.greater(' + term1 + ', ' + term2 + ')'
258 |         )
259 | 
260 |     def lessthan(self, items):
261 |         term2 = self.stack.pop()
262 |         term1 = self.stack.pop()
263 |         thisv = next(self.vcounter)
264 |         self.stack.append('v' + str(thisv))
265 |         self.cmdlist.append(
266 |             'v' + str(thisv) + ' = np.less(' + term1 + ', ' + term2 + ')'
267 |         )
268 | 
269 |     def equals(self, items):
270 |         # TODO: do we want np.isclose or np.allcose?
271 |         term2 = self.stack.pop()
272 |         term1 = self.stack.pop()
273 |         thisv = next(self.vcounter)
274 |         self.stack.append('v' + str(thisv))
275 |         self.cmdlist.append(
276 |             'v' + str(thisv) + ' = np.isclose(' + term1 + ', ' + term2 + ')'
277 |         )
278 | 
279 |     def logicalor(self, items):
280 |         term2 = self.stack.pop()
281 |         term1 = self.stack.pop()
282 |         thisv = next(self.vcounter)
283 |         self.stack.append('v' + str(thisv))
284 |         self.cmdlist.append(
285 |             'v' + str(thisv) + ' = np.logical_or(' + term1 + ', ' + term2 + ')'
286 |         )
287 | 
288 |     def ternary(self, items):
289 |         term3 = self.stack.pop()
290 |         term2 = self.stack.pop()
291 |         term1 = self.stack.pop()
292 |         thisv = next(self.vcounter)
293 |         self.stack.append('v' + str(thisv))
294 |         self.cmdlist.append(
295 |             'v' + str(thisv) + ' = np.where(' + term1 + ', ' + term2 + ', ' + term3 + ')'
296 |         )
297 |         
298 |     def returns(self, items):
299 |         self.imports.add("from zipline.pipeline.factors import Returns")
300 |         self.inputs['returns'] = 'Returns(window_length=2)'
301 |         self.stack.append('returns')
302 |         #thisv = self.vcounter.next()
303 |         #self.window = self.window+1
304 |         #self.stack.append('v' + str(thisv))
305 |         #self.cmdlist.append(
306 |         #    'v' + str(thisv) + ' = np.log(close/np.roll(close, 1, axis=0))'
307 |         #)
308 | 
309 |         
310 |     def delta(self, items):
311 |         term1 = self.stack.pop()
312 |         thisv = next(self.vcounter)
313 |         self.window = self.window+int(items[1])
314 |         self.stack.append('v' + str(thisv))
315 |         self.cmdlist.append(
316 |             'v' + str(thisv) + ' = '+term1+' - np.roll(' + term1 + ', ' + items[1] + ', axis=0)'
317 |         )
318 | 
319 |     def delay(self, items):
320 |         term1 = self.stack.pop()
321 |         thisv = next(self.vcounter)
322 |         self.window = self.window+int(items[1])
323 |         self.stack.append('v' + str(thisv))
324 |         self.cmdlist.append(
325 |             'v' + str(thisv) + ' = np.roll(' + term1 + ', ' + items[1] + ', axis=0)'
326 |         )
327 |         
328 |     def ts_max(self, items):
329 |         v1 = self.stack.pop()
330 |         thisv = next(self.vcounter)
331 |         self.window = self.window + int(items[1])
332 |         self.stack.append('v' + str(thisv))
333 |         self.cmdlist.append(
334 |             'v' + str(thisv) + ' = bn.move_max(' + v1 + ', window=' + items[1] + ', min_count=1,  axis=0)'
335 |         )
336 | 
337 |     def ts_min(self, items):
338 |         v1 = self.stack.pop()
339 |         thisv = next(self.vcounter)
340 |         self.window = self.window + int(items[1])
341 |         self.stack.append('v' + str(thisv))
342 |         self.cmdlist.append(
343 |             'v' + str(thisv) + ' = bn.move_min(' + v1 + ', window=' + items[1] + ', min_count=1,  axis=0)'
344 |         )
345 | 
346 |     def ts_argmax(self, items):
347 |         """
348 |         The behavior of `move_argmax` and associated functions in Numpy
349 |         and Bottleneck is that they index based on the shape of the array.
350 |         In this case the time increases along the 0 axis so, if window is
351 |         10 days, and the max is in the most recent day, it will return 9;
352 |         If the max is in the earliest day it will return zero. I add "1" to
353 |         this imagining a mutiplier, and do not want zero to kill values.
354 |         It is then rescaled to the interval (0,1] to match the `rank` style.
355 |         """
356 |         v1 = self.stack.pop()
357 |         thisv = next(self.vcounter)
358 |         self.window = self.window + int(items[1])
359 |         self.stack.append('v' + str(thisv))
360 |         self.cmdlist.append(
361 |             'v' + str(thisv) + ' = (1. + bn.move_argmax(' + v1 + ', window=' + items[1] + ', min_count=1,  axis=0))/' + items[1]
362 |         )
363 | 
364 |     def ts_argmin(self, items):
365 |         v1 = self.stack.pop()
366 |         thisv = next(self.vcounter)
367 |         self.window = self.window + int(items[1])
368 |         self.stack.append('v' + str(thisv))
369 |         self.cmdlist.append(
370 |             'v' + str(thisv) + ' = (1. + bn.move_argmin(' + v1 + ', window=' + items[1] + ', min_count=1,  axis=0))/' + items[1]
371 |         )
372 | 
373 |     def ts_rank(self, items):
374 |         # Returns ranks 1-N; largest value is rank N
375 |         # `bn.move_rank` returns values in the range -1 to 1.0, so we add 1
376 |         # to get 0-2 and then divide by 2.0 to get [0,1]
377 |         # note that we want [1/N, 1]
378 |         v1 = self.stack.pop()
379 |         thisv = next(self.vcounter)
380 |         self.window = self.window + int(items[1])
381 |         self.stack.append('v' + str(thisv))
382 |         self.cmdlist.append(
383 |             'v' + str(thisv) + ' = (1. + bn.move_rank(' + v1 + ', window=' + items[1] + ', min_count=1,  axis=0))/2.0'
384 |         )
385 |         
386 |     def stddev(self, items):
387 |         # check that the day is what we want
388 |         v1 = self.stack.pop()
389 |         thisv = next(self.vcounter)
390 |         self.window = self.window + int(items[1])
391 |         self.stack.append('v' + str(thisv))
392 |         self.cmdlist.append(
393 |             'v' + str(thisv) + ' = bn.move_std(' + v1 + ', window=' + items[1] + ', min_count=1,  axis=0)'
394 |         )
395 | 
396 |     def sum(self, items):
397 |         v1 = self.stack.pop()
398 |         thisv = next(self.vcounter)
399 |         self.window = self.window + int(items[1])
400 |         self.stack.append('v' + str(thisv))
401 |         self.cmdlist.append(
402 |             'v' + str(thisv) + ' = pd.DataFrame(data='+v1+').rolling(window='+items[1]+', center=False, min_periods=1).sum().values'
403 |         )
404 | 
405 |     def product(self, items):
406 |         v1 = self.stack.pop()
407 |         thisv = next(self.vcounter)
408 |         self.window = self.window + int(items[1])
409 |         self.stack.append('v' + str(thisv))
410 |         self.cmdlist.append(
411 |             'v' + str(thisv) + ' = pd.DataFrame(data='+v1+').rolling(window='+items[1]+', center=False, min_periods=1).apply(lambda x: np.prod(x)).values'
412 |         )
413 | 
414 |     def correlation(self, items):
415 |         v2 = self.stack.pop()
416 |         v1 = self.stack.pop()
417 |         thisv = next(self.vcounter)
418 |         self.window = self.window + int(items[2])
419 |         self.stack.append('v' + str(thisv))
420 |         self.cmdlist.append(
421 |             'v' + str(thisv) + ' = pd.DataFrame('+v1+').rolling(window='+items[2]+', min_periods='+items[2]+').corr(other=pd.DataFrame('+v2+')).values'
422 |         )
423 | 
424 |     def covariance(self, items):
425 |         v2 = self.stack.pop()
426 |         v1 = self.stack.pop()
427 |         thisv = next(self.vcounter)
428 |         self.window = self.window + int(items[2])
429 |         self.stack.append('v' + str(thisv))
430 |         self.cmdlist.append(
431 |             'v' + str(thisv) + ' = pd.DataFrame('+v1+').rolling(window='+items[2]+', min_periods='+items[2]+').cov(other=pd.DataFrame('+v2+')).values'
432 |         )
433 |         
434 |     def decay_linear(self, items):
435 |         v1 = self.stack.pop()
436 |         thisv = next(self.vcounter)
437 |         days = int(items[1])
438 |         self.window = self.window + days
439 |         v2 = 'v'+str(thisv)
440 |         self.cmdlist.append(
441 |             v2 + ' = (np.arange(' + items[1] + ')+1.)/np.sum(np.arange(' + items[1]+ ')+1.)'
442 |         )
443 |         thisv = next(self.vcounter)
444 |         self.stack.append('v' + str(thisv))
445 | 
446 |         self.cmdlist.append(
447 |             'v' + str(thisv) + ' = pd.DataFrame(data='+v1+').rolling(window='+items[1]+', center=False, min_periods='+items[1]+').apply(lambda x: (x*'+v2+').sum()).values'
448 |         )
449 | 
450 |     def indneutralize(self, items):
451 |         """
452 |         De-means a data matrix, data, DxN, D days in rows x N stocks in
453 |         columns by group means.
454 | 
455 |         The group means come from Pipeline Classifiers: Sector() and 
456 |         SubIndustry(). These are integer values per stock; -1 for missing.
457 | 
458 |         The Classifier produces a matrix window_lengthxN. We need the last
459 |         slice of this, assuming that the data is constant per day.
460 | 
461 |         We set up a factor indicator matrix, OHE, like a one-hot-encoded
462 |         matrix.
463 | 
464 |         # set up OHE matrix; add 1 so that missing now == 0
465 |         OHE = np.zeros(N, classifier.max()+2)
466 |         OHE[np.arange(N), classifier[-1] + 1) = 1
467 | 
468 |         # The per day (rows) by per industry (columns) mean is
469 |         per_day_per_ind_mean = data.dot(OHE)/OHE.sum(axis=0)
470 |      
471 |         # The per day (rows) per *asset* (column) mean then is
472 |         per_day_per_asset_ind_mean = per_day_per_ind_mean.dot(OHE.T)
473 | 
474 |         Finally, the de-meaned data matrix is simply calculated as
475 | 
476 |         data = data - per_day_per_asset_ind_mean
477 |         """
478 |         self.imports.add("from alphatools.ics import Sector, SubIndustry")
479 |         self.inputs['sector'] = 'Sector()'
480 |         self.inputs['subindustry'] = 'SubIndustry()'
481 | 
482 |         groupmap = {
483 |             'IndClass.subindustry': 'subindustry',
484 |             'IndClass.sector': 'sector',
485 |             'IndClass.industry': 'subindustry',
486 |         }
487 |         
488 |         v1 = self.stack.pop()
489 |         if len(items)<2:
490 |             groupby = 'IndClass.subindustry'
491 |         else:
492 |             groupby = str(items[1])
493 | 
494 |         group_label = groupmap[groupby]
495 |         
496 |         # set up ICS matrix (like one-hot-encoded matrix); we add 1 to the
497 |         # ics scheme bc -1 is a missing, so increment all by 1
498 |         ohe = 'v' + str(next(self.vcounter))
499 |         self.cmdlist.append(
500 |             ohe + ' = np.zeros(('+group_label+'.shape[1], '+group_label+'.max()+2))'
501 |         )
502 |         self.cmdlist.append(
503 |             ohe + '[np.arange('+group_label+'.shape[1]), '+group_label+'[-1] + 1] = 1'
504 |         )
505 | 
506 |         # get industry mean, per industry on columns, per day on rows
507 |         # and the dot(ohe.T) gives per stock industry mean
508 |         ind_mean = 'v' + str(next(self.vcounter))
509 |         self.cmdlist.append(
510 |             ind_mean + ' = (np.nan_to_num('+v1+'.dot('+ohe+')/'+ohe+'.sum(axis=0))).dot('+ohe+'.T)'
511 |         )
512 |         
513 |         thisv = next(self.vcounter)
514 |         self.stack.append('v' + str(thisv))
515 |         # subtract the per stock industry mean
516 |         self.cmdlist.append(
517 |             'v' + str(thisv) + ' = '+v1+' - '+ind_mean
518 |         )
519 |         
520 |         
521 |     def transform(self, tree):
522 |         self._transform_tree(tree)
523 |         v1 = self.stack.pop()
524 |         self.cmdlist.append(
525 |             'out[:] = ' + v1 + '[-1]'
526 |         )
527 |         return self
528 |         #return ["window_length = "+str(self.window)] + self.cmdlist
529 | 
530 | 
531 | class ExpressionAlpha():
532 | 
533 |     def __init__(self, expr_string):
534 |         self.expr_string = expr_string
535 |         self.code = ""
536 |         fname = path.join(path.dirname(__file__), 'expression.lark')
537 |         with open(fname, 'r') as grammar_file:
538 |             self.grammar = grammar_file.read()
539 | 
540 |     def make_pipeline_factor(self):
541 |         self.parse()
542 |         self.transform()
543 |         self.generate_pipeline_code()
544 |         exec(self.imports, globals(), globals())
545 |         exec(self.pipeline_code, globals(), globals())
546 |         self.pipeline_factor = ExprAlpha_1
547 |         return self
548 |     
549 |     def parse(self):
550 |         my_parser = Lark(self.grammar, start='value')
551 |         self.tree = my_parser.parse(self.expr_string)
552 |         return self
553 | 
554 |     def transform(self):
555 |         self.transformed = MyTransformer().transform(self.tree)
556 |         return self
557 | 
558 |     def generate_pipeline_code(self):
559 |         raw_np_list = \
560 |             ["window_length = "+str(self.transformed.window)] + \
561 |             self.transformed.cmdlist
562 |         raw_imports = \
563 |             self.transformed.imports
564 | 
565 |         (data_names, factor_names) = zip(*iteritems(self.transformed.inputs))
566 |         
567 |         self.imports = ['{0}\n'.format(imp) for imp in raw_imports]
568 |         self.imports.append("from zipline.pipeline.factors import CustomFactor\n")
569 |         self.imports.append("import numpy as np\n")
570 |         self.imports.append("import bottleneck as bn\n")
571 |         self.imports.append("import pandas as pd\n")
572 |         self.imports = ["from __future__ import division\n"] + \
573 |             self.imports
574 |         
575 |         self.code = ["class ExprAlpha_1(CustomFactor):"]
576 | 
577 |         self.code.append("    inputs = [" + ', '.join(factor_names) + "]")
578 |         self.code.append('    {0}'.format(raw_np_list[0]))
579 |         self.code.append("    def compute(self, today, assets, out, " + ', '.join(data_names) + "):")
580 |         lst = ['        {0}'.format(elem) for elem in raw_np_list]
581 | 
582 |         self.code = self.code + lst[1:]
583 | 
584 |         self.imports = ''.join(self.imports)
585 |         
586 |         self.code_string = '\n'.join(self.code)
587 |         self.pipeline_code = autopep8.fix_code(self.code_string)
588 |         return self
589 | 
590 | if __name__ == '__main__':
591 |     e = ExpressionAlpha('close/delay(opens,1)')
592 |     e.to_pipeline()
593 |     print(e.pipeline_code)
594 | 


--------------------------------------------------------------------------------
/notebooks/pipeline-blaze-minimal.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import blaze as bz\n",
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd\n",
 12 |     "import sqlite3\n",
 13 |     "import itertools\n",
 14 |     "\n",
 15 |     "from zipline.data import bundles\n",
 16 |     "from zipline.utils.calendars import get_calendar\n",
 17 |     "\n",
 18 |     "from zipline.pipeline import Pipeline\n",
 19 |     "from zipline.pipeline.data import USEquityPricing\n",
 20 |     "from zipline.pipeline.data import DataSet\n",
 21 |     "from zipline.pipeline.engine import SimplePipelineEngine\n",
 22 |     "from zipline.pipeline.filters import StaticAssets\n",
 23 |     "from zipline.pipeline.loaders import USEquityPricingLoader\n",
 24 |     "from zipline.pipeline.loaders.blaze import BlazeLoader, from_blaze\n",
 25 |     "\n",
 26 |     "\n",
 27 |     "trading_calendar = get_calendar('NYSE')\n",
 28 |     "bundle_data = bundles.load('quandl')\n",
 29 |     "\n",
 30 |     "# spoof some data\n",
 31 |     "\n",
 32 |     "np.random.seed(100)\n",
 33 |     "\n",
 34 |     "start = trading_calendar.closes.index.get_loc('2016-01-04 00:00:00+00:00')\n",
 35 |     "end = trading_calendar.closes.index.get_loc('2018-08-06 00:00:00+00:00')\n",
 36 |     "\n",
 37 |     "#dates = list(trading_calendar.closes.index)[start:end]\n",
 38 |     "dates = trading_calendar.closes.index[start:end]\n",
 39 |     "sids = bundle_data.asset_finder.sids\n",
 40 |     "\n",
 41 |     "df = pd.DataFrame(\n",
 42 |     "    data={'value': np.random.random(size=len(dates)*len(sids))},\n",
 43 |     "    index = pd.MultiIndex.from_tuples(list(itertools.product(dates,sids)), names=('asof_date', 'sid'))\n",
 44 |     ")"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 2,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "df = df.reset_index()"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 3,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "# this is necessary because sqlite3 doesn't like it if we have the time\n",
 63 |     "df.asof_date = df.asof_date.dt.date"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 4,
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "data": {
 73 |       "text/html": [
 74 |        "<div>\n",
 75 |        "<table border=\"1\" class=\"dataframe\">\n",
 76 |        "  <thead>\n",
 77 |        "    <tr style=\"text-align: right;\">\n",
 78 |        "      <th></th>\n",
 79 |        "      <th>asof_date</th>\n",
 80 |        "      <th>sid</th>\n",
 81 |        "      <th>value</th>\n",
 82 |        "    </tr>\n",
 83 |        "  </thead>\n",
 84 |        "  <tbody>\n",
 85 |        "    <tr>\n",
 86 |        "      <th>0</th>\n",
 87 |        "      <td>2016-01-04</td>\n",
 88 |        "      <td>0</td>\n",
 89 |        "      <td>0.543405</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>1</th>\n",
 93 |        "      <td>2016-01-04</td>\n",
 94 |        "      <td>1</td>\n",
 95 |        "      <td>0.278369</td>\n",
 96 |        "    </tr>\n",
 97 |        "    <tr>\n",
 98 |        "      <th>2</th>\n",
 99 |        "      <td>2016-01-04</td>\n",
100 |        "      <td>2</td>\n",
101 |        "      <td>0.424518</td>\n",
102 |        "    </tr>\n",
103 |        "    <tr>\n",
104 |        "      <th>3</th>\n",
105 |        "      <td>2016-01-04</td>\n",
106 |        "      <td>3</td>\n",
107 |        "      <td>0.844776</td>\n",
108 |        "    </tr>\n",
109 |        "    <tr>\n",
110 |        "      <th>4</th>\n",
111 |        "      <td>2016-01-04</td>\n",
112 |        "      <td>4</td>\n",
113 |        "      <td>0.004719</td>\n",
114 |        "    </tr>\n",
115 |        "  </tbody>\n",
116 |        "</table>\n",
117 |        "</div>"
118 |       ],
119 |       "text/plain": [
120 |        "    asof_date  sid     value\n",
121 |        "0  2016-01-04    0  0.543405\n",
122 |        "1  2016-01-04    1  0.278369\n",
123 |        "2  2016-01-04    2  0.424518\n",
124 |        "3  2016-01-04    3  0.844776\n",
125 |        "4  2016-01-04    4  0.004719"
126 |       ]
127 |      },
128 |      "execution_count": 4,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "df.head()"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 5,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "# put into sqlite db\n",
144 |     "df.to_sql('ds_table', con=sqlite3.connect('temp.db'), if_exists='replace', index=False)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 6,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "name": "stdout",
154 |      "output_type": "stream",
155 |      "text": [
156 |       "[('2016-01-04', 0, 0.5434049417909654), ('2016-01-04', 1, 0.27836938509379616), ('2016-01-04', 2, 0.4245175907491331), ('2016-01-04', 3, 0.8447761323199037), ('2016-01-04', 4, 0.004718856190972565), ('2016-01-04', 5, 0.12156912078311422), ('2016-01-04', 6, 0.6707490847267786), ('2016-01-04', 7, 0.8258527551050476), ('2016-01-04', 8, 0.13670658968495297), ('2016-01-04', 9, 0.57509332942725)]\n"
157 |      ]
158 |     }
159 |    ],
160 |    "source": [
161 |     "con = sqlite3.connect('temp.db')\n",
162 |     "cursor = con.cursor()\n",
163 |     "cursor.execute(\"SELECT * FROM ds_table LIMIT 10\")\n",
164 |     "print(cursor.fetchall())"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 7,
170 |    "metadata": {},
171 |    "outputs": [],
172 |    "source": [
173 |     "from datashape import dshape"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 8,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "ds_dshape = dshape(\"var*{asof_date: datetime, sid: int64, value: float64}\")"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 9,
188 |    "metadata": {},
189 |    "outputs": [
190 |     {
191 |      "data": {
192 |       "text/plain": [
193 |        "dshape(\"var * {asof_date: datetime, sid: int64, value: float64}\")"
194 |       ]
195 |      },
196 |      "execution_count": 9,
197 |      "metadata": {},
198 |      "output_type": "execute_result"
199 |     }
200 |    ],
201 |    "source": [
202 |     "ds_dshape"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 10,
208 |    "metadata": {},
209 |    "outputs": [],
210 |    "source": [
211 |     "# create the blaze expr\n",
212 |     "\n",
213 |     "expr = bz.Data(\n",
214 |     "    'sqlite:///temp.db::ds_table',\n",
215 |     "    dshape=ds_dshape\n",
216 |     ")"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 11,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "data": {
226 |       "text/plain": [
227 |        "dshape(\"var * {asof_date: datetime, sid: int64, value: float64}\")"
228 |       ]
229 |      },
230 |      "execution_count": 11,
231 |      "metadata": {},
232 |      "output_type": "execute_result"
233 |     }
234 |    ],
235 |    "source": [
236 |     "expr.dshape"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 12,
242 |    "metadata": {},
243 |    "outputs": [
244 |     {
245 |      "data": {
246 |       "text/html": [
247 |        "<div>\n",
248 |        "<table border=\"1\" class=\"dataframe\">\n",
249 |        "  <thead>\n",
250 |        "    <tr style=\"text-align: right;\">\n",
251 |        "      <th></th>\n",
252 |        "      <th>asof_date</th>\n",
253 |        "      <th>sid</th>\n",
254 |        "      <th>value</th>\n",
255 |        "    </tr>\n",
256 |        "  </thead>\n",
257 |        "  <tbody>\n",
258 |        "    <tr>\n",
259 |        "      <th>0</th>\n",
260 |        "      <td>2016-01-04</td>\n",
261 |        "      <td>0</td>\n",
262 |        "      <td>0.543405</td>\n",
263 |        "    </tr>\n",
264 |        "    <tr>\n",
265 |        "      <th>1</th>\n",
266 |        "      <td>2016-01-04</td>\n",
267 |        "      <td>1</td>\n",
268 |        "      <td>0.278369</td>\n",
269 |        "    </tr>\n",
270 |        "    <tr>\n",
271 |        "      <th>2</th>\n",
272 |        "      <td>2016-01-04</td>\n",
273 |        "      <td>2</td>\n",
274 |        "      <td>0.424518</td>\n",
275 |        "    </tr>\n",
276 |        "    <tr>\n",
277 |        "      <th>3</th>\n",
278 |        "      <td>2016-01-04</td>\n",
279 |        "      <td>3</td>\n",
280 |        "      <td>0.844776</td>\n",
281 |        "    </tr>\n",
282 |        "    <tr>\n",
283 |        "      <th>4</th>\n",
284 |        "      <td>2016-01-04</td>\n",
285 |        "      <td>4</td>\n",
286 |        "      <td>0.004719</td>\n",
287 |        "    </tr>\n",
288 |        "    <tr>\n",
289 |        "      <th>5</th>\n",
290 |        "      <td>2016-01-04</td>\n",
291 |        "      <td>5</td>\n",
292 |        "      <td>0.121569</td>\n",
293 |        "    </tr>\n",
294 |        "    <tr>\n",
295 |        "      <th>6</th>\n",
296 |        "      <td>2016-01-04</td>\n",
297 |        "      <td>6</td>\n",
298 |        "      <td>0.670749</td>\n",
299 |        "    </tr>\n",
300 |        "    <tr>\n",
301 |        "      <th>7</th>\n",
302 |        "      <td>2016-01-04</td>\n",
303 |        "      <td>7</td>\n",
304 |        "      <td>0.825853</td>\n",
305 |        "    </tr>\n",
306 |        "    <tr>\n",
307 |        "      <th>8</th>\n",
308 |        "      <td>2016-01-04</td>\n",
309 |        "      <td>8</td>\n",
310 |        "      <td>0.136707</td>\n",
311 |        "    </tr>\n",
312 |        "    <tr>\n",
313 |        "      <th>9</th>\n",
314 |        "      <td>2016-01-04</td>\n",
315 |        "      <td>9</td>\n",
316 |        "      <td>0.575093</td>\n",
317 |        "    </tr>\n",
318 |        "    <tr>\n",
319 |        "      <th>10</th>\n",
320 |        "      <td>2016-01-04</td>\n",
321 |        "      <td>10</td>\n",
322 |        "      <td>0.891322</td>\n",
323 |        "    </tr>\n",
324 |        "  </tbody>\n",
325 |        "</table>\n",
326 |        "</div>"
327 |       ],
328 |       "text/plain": [
329 |        "     asof_date  sid     value\n",
330 |        "0   2016-01-04    0  0.543405\n",
331 |        "1   2016-01-04    1  0.278369\n",
332 |        "2   2016-01-04    2  0.424518\n",
333 |        "3   2016-01-04    3  0.844776\n",
334 |        "4   2016-01-04    4  0.004719\n",
335 |        "5   2016-01-04    5  0.121569\n",
336 |        "6   2016-01-04    6  0.670749\n",
337 |        "7   2016-01-04    7  0.825853\n",
338 |        "8   2016-01-04    8  0.136707\n",
339 |        "9   2016-01-04    9  0.575093\n",
340 |        "10  2016-01-04   10  0.891322"
341 |       ]
342 |      },
343 |      "execution_count": 12,
344 |      "metadata": {},
345 |      "output_type": "execute_result"
346 |     }
347 |    ],
348 |    "source": [
349 |     "expr.peek()"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": 13,
355 |    "metadata": {},
356 |    "outputs": [
357 |     {
358 |      "data": {
359 |       "text/plain": [
360 |        "dshape(\"{asof_date: datetime, sid: int64, value: float64}\")"
361 |       ]
362 |      },
363 |      "execution_count": 13,
364 |      "metadata": {},
365 |      "output_type": "execute_result"
366 |     }
367 |    ],
368 |    "source": [
369 |     "expr.schema"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": 14,
375 |    "metadata": {},
376 |    "outputs": [],
377 |    "source": [
378 |     "# create and empty BlazeLoader\n",
379 |     "my_blaze_loader = BlazeLoader()\n",
380 |     "\n",
381 |     "# create the DataSet\n",
382 |     "ds = from_blaze(\n",
383 |     "    expr,\n",
384 |     "    no_deltas_rule='ignore',\n",
385 |     "    no_checkpoints_rule='ignore',\n",
386 |     "    loader=my_blaze_loader,\n",
387 |     "    missing_values={'index':-1}\n",
388 |     ")"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": 15,
394 |    "metadata": {},
395 |    "outputs": [
396 |     {
397 |      "data": {
398 |       "text/plain": [
399 |        "True"
400 |       ]
401 |      },
402 |      "execution_count": 15,
403 |      "metadata": {},
404 |      "output_type": "execute_result"
405 |     }
406 |    ],
407 |    "source": [
408 |     "issubclass(ds, DataSet)"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": 16,
414 |    "metadata": {},
415 |    "outputs": [
416 |     {
417 |      "data": {
418 |       "text/plain": [
419 |        "<zipline.pipeline.loaders.blaze.core.BlazeLoader at 0x113bd5cc0>"
420 |       ]
421 |      },
422 |      "execution_count": 16,
423 |      "metadata": {},
424 |      "output_type": "execute_result"
425 |     }
426 |    ],
427 |    "source": [
428 |     "my_blaze_loader"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": 17,
434 |    "metadata": {},
435 |    "outputs": [],
436 |    "source": [
437 |     "pipeline_loader = USEquityPricingLoader(\n",
438 |     "    bundle_data.equity_daily_bar_reader,\n",
439 |     "    bundle_data.adjustment_reader,\n",
440 |     ")"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": 18,
446 |    "metadata": {},
447 |    "outputs": [],
448 |    "source": [
449 |     "def choose_loader(column):\n",
450 |     "    if column in USEquityPricing.columns:\n",
451 |     "        return pipeline_loader\n",
452 |     "    else:\n",
453 |     "        return my_blaze_loader"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": 19,
459 |    "metadata": {},
460 |    "outputs": [],
461 |    "source": [
462 |     "engine = SimplePipelineEngine(\n",
463 |     "    get_loader=choose_loader,\n",
464 |     "    calendar=trading_calendar.all_sessions,\n",
465 |     "    asset_finder=bundle_data.asset_finder,\n",
466 |     ")"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "code",
471 |    "execution_count": 20,
472 |    "metadata": {},
473 |    "outputs": [],
474 |    "source": [
475 |     "assets = bundle_data.asset_finder.lookup_symbols(['A', 'AAL'], as_of_date=None)"
476 |    ]
477 |   },
478 |   {
479 |    "cell_type": "code",
480 |    "execution_count": 21,
481 |    "metadata": {},
482 |    "outputs": [],
483 |    "source": [
484 |     "p = Pipeline(\n",
485 |     "    columns={\n",
486 |     "        'price': USEquityPricing.close.latest,\n",
487 |     "        'col_A': ds.value.latest,\n",
488 |     "    },\n",
489 |     "    screen=StaticAssets(assets)\n",
490 |     ")"
491 |    ]
492 |   },
493 |   {
494 |    "cell_type": "code",
495 |    "execution_count": 22,
496 |    "metadata": {},
497 |    "outputs": [
498 |     {
499 |      "data": {
500 |       "image/svg+xml": [
501 |        "<svg height=\"206pt\" viewBox=\"0.00 0.00 388.00 206.00\" width=\"388pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
502 |        "<g class=\"graph\" id=\"graph0\" transform=\"scale(1 1) rotate(0) translate(4 202)\">\n",
503 |        "<title>G</title>\n",
504 |        "<polygon fill=\"#ffffff\" points=\"-4,4 -4,-202 384,-202 384,4 -4,4\" stroke=\"transparent\"/>\n",
505 |        "<g class=\"cluster\" id=\"clust1\">\n",
506 |        "<title>cluster_Output</title>\n",
507 |        "<polygon fill=\"#ffec8b\" points=\"95,-8 95,-82 339,-82 339,-8 95,-8\" stroke=\"#ffec8b\"/>\n",
508 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"217\" y=\"-14.8\">Output</text>\n",
509 |        "</g>\n",
510 |        "<g class=\"cluster\" id=\"clust2\">\n",
511 |        "<title>cluster_Input</title>\n",
512 |        "<polygon fill=\"#ffec8b\" points=\"8,-102 8,-190 372,-190 372,-102 8,-102\" stroke=\"#ffec8b\"/>\n",
513 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"190\" y=\"-174.8\">Input</text>\n",
514 |        "</g>\n",
515 |        "<!-- 5004224496 -->\n",
516 |        "<g class=\"node\" id=\"node1\">\n",
517 |        "<title>5004224496</title>\n",
518 |        "<polygon fill=\"#ccebc5\" points=\"331.4426,-74 246.5574,-74 246.5574,-38 331.4426,-38 331.4426,-74\" stroke=\"#000000\"/>\n",
519 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"289\" y=\"-51.8\">StaticAssets</text>\n",
520 |        "</g>\n",
521 |        "<!-- 5004224048 -->\n",
522 |        "<g class=\"node\" id=\"node2\">\n",
523 |        "<title>5004224048</title>\n",
524 |        "<polygon fill=\"#b3cde3\" points=\"229,-74 175,-74 175,-38 229,-38 229,-74\" stroke=\"#000000\"/>\n",
525 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"202\" y=\"-51.8\">Latest</text>\n",
526 |        "</g>\n",
527 |        "<!-- 5004223264 -->\n",
528 |        "<g class=\"node\" id=\"node3\">\n",
529 |        "<title>5004223264</title>\n",
530 |        "<polygon fill=\"#b3cde3\" points=\"157,-74 103,-74 103,-38 157,-38 157,-74\" stroke=\"#000000\"/>\n",
531 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"130\" y=\"-51.8\">Latest</text>\n",
532 |        "</g>\n",
533 |        "<!-- 4623743016 -->\n",
534 |        "<g class=\"node\" id=\"node4\">\n",
535 |        "<title>4623743016</title>\n",
536 |        "<polygon fill=\"#fbb4ae\" points=\"364.0381,-160 195.9619,-160 195.9619,-110 364.0381,-110 364.0381,-160\" stroke=\"#000000\"/>\n",
537 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"start\" x=\"203.981\" y=\"-144.8\">BoundColumn:</text>\n",
538 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"start\" x=\"203.981\" y=\"-130.8\">  Dataset: USEquityPricing</text>\n",
539 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"start\" x=\"203.981\" y=\"-116.8\">  Column: close</text>\n",
540 |        "</g>\n",
541 |        "<!-- 4623743016&#45;&gt;5004224048 -->\n",
542 |        "<g class=\"edge\" id=\"edge1\">\n",
543 |        "<title>4623743016-&gt;5004224048</title>\n",
544 |        "<path d=\"M212.4905,-109.5691C212.4905,-109.5691 212.4905,-84.1242 212.4905,-84.1242\" fill=\"none\" stroke=\"#000000\"/>\n",
545 |        "<polygon fill=\"#000000\" points=\"215.9906,-84.1242 212.4905,-74.1242 208.9906,-84.1242 215.9906,-84.1242\" stroke=\"#000000\"/>\n",
546 |        "</g>\n",
547 |        "<!-- 4626226368 -->\n",
548 |        "<g class=\"node\" id=\"node5\">\n",
549 |        "<title>4626226368</title>\n",
550 |        "<polygon fill=\"#fbb4ae\" points=\"178.0563,-160 15.9437,-160 15.9437,-110 178.0563,-110 178.0563,-160\" stroke=\"#000000\"/>\n",
551 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"start\" x=\"23.7222\" y=\"-144.8\">BoundColumn:</text>\n",
552 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"start\" x=\"23.7222\" y=\"-130.8\">  Dataset: BlazeDataSet_0</text>\n",
553 |        "<text fill=\"#000000\" font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"start\" x=\"23.7222\" y=\"-116.8\">  Column: value</text>\n",
554 |        "</g>\n",
555 |        "<!-- 4626226368&#45;&gt;5004223264 -->\n",
556 |        "<g class=\"edge\" id=\"edge2\">\n",
557 |        "<title>4626226368-&gt;5004223264</title>\n",
558 |        "<path d=\"M130,-109.5691C130,-109.5691 130,-84.1242 130,-84.1242\" fill=\"none\" stroke=\"#000000\"/>\n",
559 |        "<polygon fill=\"#000000\" points=\"133.5001,-84.1242 130,-74.1242 126.5001,-84.1242 133.5001,-84.1242\" stroke=\"#000000\"/>\n",
560 |        "</g>\n",
561 |        "</g>\n",
562 |        "</svg>"
563 |       ],
564 |       "text/plain": [
565 |        "<IPython.core.display.SVG object>"
566 |       ]
567 |      },
568 |      "execution_count": 22,
569 |      "metadata": {},
570 |      "output_type": "execute_result"
571 |     }
572 |    ],
573 |    "source": [
574 |     "p.show_graph()"
575 |    ]
576 |   },
577 |   {
578 |    "cell_type": "code",
579 |    "execution_count": 23,
580 |    "metadata": {},
581 |    "outputs": [
582 |     {
583 |      "ename": "TypeError",
584 |      "evalue": "Cannot change data-type for object array.",
585 |      "output_type": "error",
586 |      "traceback": [
587 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
588 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
589 |       "\u001b[0;32m<ipython-input-23-ddd795584d2e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimestamp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'2016-01-05'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtz\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utc'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimestamp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'2018-01-04'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtz\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utc'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m )\n",
590 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/zipline/pipeline/engine.py\u001b[0m in \u001b[0;36mrun_pipeline\u001b[0;34m(self, pipeline, start_date, end_date)\u001b[0m\n\u001b[1;32m    309\u001b[0m             \u001b[0mdates\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    310\u001b[0m             \u001b[0massets\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 311\u001b[0;31m             \u001b[0minitial_workspace\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    312\u001b[0m         )\n\u001b[1;32m    313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
591 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/zipline/pipeline/engine.py\u001b[0m in \u001b[0;36mcompute_chunk\u001b[0;34m(self, graph, dates, assets, initial_workspace)\u001b[0m\n\u001b[1;32m    522\u001b[0m                 \u001b[0mloader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_loader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mterm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    523\u001b[0m                 loaded = loader.load_adjusted_array(\n\u001b[0;32m--> 524\u001b[0;31m                     \u001b[0mto_load\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask_dates\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0massets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    525\u001b[0m                 )\n\u001b[1;32m    526\u001b[0m                 assert set(loaded) == set(to_load), (\n",
592 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/zipline/pipeline/loaders/blaze/core.py\u001b[0m in \u001b[0;36mload_adjusted_array\u001b[0;34m(self, columns, dates, assets, mask)\u001b[0m\n\u001b[1;32m    891\u001b[0m             self.pool.imap_unordered(\n\u001b[1;32m    892\u001b[0m                 \u001b[0mpartial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_load_dataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdates\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0massets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 893\u001b[0;31m                 \u001b[0mitervalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgetitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_table_expressions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    894\u001b[0m             ),\n\u001b[1;32m    895\u001b[0m         )\n",
593 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/toolz/dicttoolz.py\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(*dicts, **kwargs)\u001b[0m\n\u001b[1;32m     36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     37\u001b[0m     \u001b[0mrv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfactory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 38\u001b[0;31m     \u001b[0;32mfor\u001b[0m \u001b[0md\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdicts\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     39\u001b[0m         \u001b[0mrv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     40\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mrv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
594 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/zipline/pipeline/loaders/blaze/core.py\u001b[0m in \u001b[0;36m_load_dataset\u001b[0;34m(self, dates, assets, mask, columns)\u001b[0m\n\u001b[1;32m    985\u001b[0m                 \u001b[0massets\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    986\u001b[0m                 \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 987\u001b[0;31m                 \u001b[0mall_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    988\u001b[0m             )\n\u001b[1;32m    989\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
595 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/zipline/pipeline/loaders/blaze/_core.pyx\u001b[0m in \u001b[0;36mzipline.pipeline.loaders.blaze._core.adjusted_arrays_from_rows_with_assets\u001b[0;34m()\u001b[0m\n",
596 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/zipline/pipeline/loaders/blaze/_core.pyx\u001b[0m in \u001b[0;36mzipline.pipeline.loaders.blaze._core.adjusted_arrays_from_rows_with_assets\u001b[0;34m()\u001b[0m\n",
597 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/zipline/pipeline/loaders/blaze/_core.pyx\u001b[0m in \u001b[0;36mzipline.pipeline.loaders.blaze._core.arrays_from_rows_with_assets\u001b[0;34m()\u001b[0m\n",
598 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/zipline/pipeline/loaders/blaze/_core.pyx\u001b[0m in \u001b[0;36mzipline.pipeline.loaders.blaze._core.arrays_from_rows\u001b[0;34m()\u001b[0m\n",
599 |       "\u001b[0;32m/anaconda3/envs/testenv/lib/python3.5/site-packages/numpy/core/_internal.py\u001b[0m in \u001b[0;36m_view_is_safe\u001b[0;34m(oldtype, newtype)\u001b[0m\n\u001b[1;32m    365\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    366\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mnewtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhasobject\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0moldtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhasobject\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 367\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Cannot change data-type for object array.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    368\u001b[0m     \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    369\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
600 |       "\u001b[0;31mTypeError\u001b[0m: Cannot change data-type for object array."
601 |      ]
602 |     }
603 |    ],
604 |    "source": [
605 |     "df = engine.run_pipeline(\n",
606 |     "    p,\n",
607 |     "    pd.Timestamp('2016-01-05', tz='utc'),\n",
608 |     "    pd.Timestamp('2018-01-04', tz='utc')\n",
609 |     ")"
610 |    ]
611 |   },
612 |   {
613 |    "cell_type": "code",
614 |    "execution_count": null,
615 |    "metadata": {},
616 |    "outputs": [],
617 |    "source": [
618 |     "df.head()"
619 |    ]
620 |   },
621 |   {
622 |    "cell_type": "code",
623 |    "execution_count": null,
624 |    "metadata": {},
625 |    "outputs": [],
626 |    "source": []
627 |   },
628 |   {
629 |    "cell_type": "code",
630 |    "execution_count": null,
631 |    "metadata": {},
632 |    "outputs": [],
633 |    "source": []
634 |   },
635 |   {
636 |    "cell_type": "code",
637 |    "execution_count": null,
638 |    "metadata": {},
639 |    "outputs": [],
640 |    "source": []
641 |   }
642 |  ],
643 |  "metadata": {
644 |   "kernelspec": {
645 |    "display_name": "Python 3.5 (env_alphatools)",
646 |    "language": "python",
647 |    "name": "env_alphatools"
648 |   },
649 |   "language_info": {
650 |    "codemirror_mode": {
651 |     "name": "ipython",
652 |     "version": 3
653 |    },
654 |    "file_extension": ".py",
655 |    "mimetype": "text/x-python",
656 |    "name": "python",
657 |    "nbconvert_exporter": "python",
658 |    "pygments_lexer": "ipython3",
659 |    "version": "3.5.5"
660 |   }
661 |  },
662 |  "nbformat": 4,
663 |  "nbformat_minor": 2
664 | }
665 | 


--------------------------------------------------------------------------------
/notebooks/model.txt:
--------------------------------------------------------------------------------
  1 | tree
  2 | version=v2
  3 | num_class=1
  4 | num_tree_per_iteration=1
  5 | label_index=0
  6 | max_feature_idx=27
  7 | objective=binary sigmoid:1
  8 | feature_names=feature_0 feature_1 feature_2 feature_3 feature_4 feature_5 feature_6 feature_7 feature_8 feature_9 feature_10 feature_11 feature_12 feature_13 feature_14 feature_15 feature_16 feature_17 feature_18 feature_19 feature_20 feature_21 feature_22 feature_23 feature_24 feature_25 feature_26 feature_27
  9 | feature_infos=[0.27500000000000002:6.6950000000000003] [-2.4169999999999998:2.4300000000000002] [-1.7430000000000001:1.7430000000000001] [0.019:5.7000000000000002] [-1.7430000000000001:1.7430000000000001] [0.159:4.1900000000000004] [-2.9410000000000003:2.9700000000000002] [-1.7409999999999999:1.7409999999999999] [0:2.173] [0.19:5.1929999999999996] [-2.9039999999999999:2.9089999999999998] [-1.742:1.7430000000000001] [0:2.2149999999999999] [0.26400000000000001:6.5229999999999997] [-2.7280000000000002:2.7269999999999999] [-1.742:1.742] [0:2.548] [0.36499999999999999:6.0679999999999996] [-2.4950000000000001:2.496] [-1.74:1.7430000000000001] [0:3.1019999999999999] 1:0:2:3:4:5:6:7 [0.41899999999999998:7.3920000000000003] [0.46100000000000002:3.6819999999999999] [0.38400000000000001:6.5829999999999993] [0.093000000000000013:7.8600000000000003] [0.38900000000000001:4.5430000000000001] [0.48899999999999999:4.3159999999999998]
 10 | tree_sizes=2473 2514 2494 2493 2501 2521 2510 2503 2510 2516
 11 | 
 12 | Tree=0
 13 | num_leaves=31
 14 | num_cat=0
 15 | split_feature=25 26 27 25 25 24 22 5 5 27 26 27 25 26 27 24 3 25 13 3 22 19 5 24 4 4 25 24 13 24
 16 | split_gain=258.038 165.11 154.742 63.219 70.8123 52.7563 84.2546 36.0199 33.6019 27.9057 27.8139 26.5869 26.0761 22.1221 21.9403 33.7759 17.9554 17.3962 16.275 15.194 14.467 15.3902 14.2355 13.4469 12.7354 13.7208 12.6664 12.7077 12.328 14.4412
 17 | threshold=1.2815000000000001 0.78050000000000008 0.90450000000000019 0.98450000000000015 0.6785000000000001 1.0265000000000002 1.0445000000000002 0.82550000000000001 1.0195000000000003 0.9225000000000001 0.86950000000000016 0.78850000000000009 0.62350000000000005 0.88850000000000018 0.88950000000000007 1.1225000000000003 1.9205000000000003 0.98950000000000016 1.3835000000000004 1.8335000000000004 0.94750000000000012 -0.96449999999999991 1.3365000000000002 0.71850000000000014 1.2535000000000001 -0.27149999999999996 0.58650000000000013 0.74550000000000016 0.96150000000000013 0.75150000000000006
 18 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 19 | left_child=1 7 3 4 28 6 -4 23 13 22 11 -6 -7 -5 20 -16 17 -14 -19 -17 -2 -22 26 -1 25 -8 27 -9 29 -3
 20 | right_child=14 2 5 8 10 12 24 9 -10 -11 -12 -13 16 -15 15 19 -18 18 -20 -21 21 -23 -24 -25 -26 -27 -28 -29 -30 -31
 21 | leaf_value=0.10284758317169626 0.081717548115204597 0.16920083703122518 0.061477323984852811 0.079260885438702716 0.18080904101407977 0.099309317207388309 0.12110846920445478 0.12408276028202063 0.17475193328198171 0.017008281489650057 0.1969212081555923 0.1235008337668447 0.16066263555873586 0.13517987794167297 0.05173771107426077 0.10127986202068245 0.10275151646010305 0.11491813432626971 0.17550668759677757 0.045185775908694936 0.063338584813271967 0.15085137998011328 0.17902887439915816 0.074439733286902621 0.085450775035909657 0.16644963581922742 0.14411108005613313 0.055249166772639538 0.16659762686134424 0.11481669961729626
 22 | leaf_count=283 121 62 307 129 277 161 135 75 112 22 751 115 457 158 422 324 96 203 57 57 26 90 57 409 44 133 139 42 102 234
 23 | internal_value=0 0.217752 0.42333 0.803342 0.987405 -0.0394609 -0.496888 -0.48946 0.0924624 0.0498958 1.24088 0.808221 0.251245 -0.270801 -0.88835 -1.04647 0.397149 0.506114 0.0923076 -0.613956 -0.35262 0.153016 0.203224 -0.750565 0.236446 0.400485 0.00157742 -0.484247 0.259469 0.0524468
 24 | internal_count=5600 4560 3533 1940 1541 1593 619 1027 399 335 1143 392 974 287 1040 803 813 717 260 381 237 116 313 692 312 268 256 117 398 296
 25 | shrinkage=1
 26 | 
 27 | 
 28 | Tree=1
 29 | num_leaves=31
 30 | num_cat=0
 31 | split_feature=25 26 25 25 3 5 22 5 26 22 22 26 22 3 26 24 9 25 3 5 24 26 19 13 9 19 24 5 23 3
 32 | split_gain=232.567 149.461 79.7137 118.312 38.3157 33.9073 19.959 19.4367 21.805 18.5965 19.0683 17.7145 20.2141 15.8739 14.2707 13.2396 12.7568 12.7083 12.4665 12.2694 11.595 11.5527 11.8451 11.5454 11.2504 10.1043 9.87802 9.37985 9.34468 10.6218
 33 | threshold=1.2815000000000001 0.77650000000000008 0.62350000000000005 0.98950000000000016 1.6175000000000004 0.82550000000000001 1.0445000000000002 0.88150000000000006 1.0375000000000003 1.0025000000000002 0.84950000000000014 0.97650000000000003 0.78450000000000009 0.97650000000000003 0.94950000000000012 0.71850000000000014 0.89250000000000018 2.0390000000000006 1.1435000000000002 1.5755000000000001 1.7450000000000003 1.1205000000000003 0.62900000000000011 0.73250000000000004 0.81850000000000012 1.0425000000000002 1.1005000000000003 0.98050000000000004 0.98050000000000004 0.33650000000000008
 34 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 35 | left_child=1 5 19 4 7 15 11 18 -9 10 -5 24 -13 -7 27 25 -15 -8 -4 -3 23 -16 -23 -6 -2 28 -14 -11 -1 -30
 36 | right_child=6 2 3 9 20 13 17 8 -10 14 -12 12 26 16 21 -17 -18 -19 -20 -21 -22 22 -24 -25 -26 -27 -28 -29 29 -31
 37 | leaf_value=-0.070541453406693988 -0.063694696339596971 -0.018268219940631856 0.040536711276372622 0.0050468732160738579 0.041073763562594706 0.020651352864842279 -0.011240886118261467 0.065302003278970011 0.032851254148682729 -0.048367080735581769 -0.034450629006536941 -0.0053636137701500485 -0.077843794406284653 -0.049414992210901412 0.046233340678247803 -0.048147811369761845 0.016529617086431816 -0.054394845292876338 0.012504728853908589 0.032712218632545885 -0.056054581121086335 -0.014206068541919782 0.048014347247125215 -0.0035552662953019934 0.00058016734174086166 -0.05701248666597733 -0.049041183946400627 0.032225489254395945 0.056549891419987078 -0.012066252716263076
 38 | leaf_count=23 39 534 688 216 89 198 258 636 310 53 284 61 300 79 123 399 47 93 208 52 28 102 44 169 91 58 198 20 26 174
 39 | internal_value=0 0.207189 0.400353 0.5339 0.786129 -0.474962 -0.842165 0.892352 1.09303 -0.101684 -0.347898 -1.04008 -1.19477 0.0589292 0.258551 -0.728684 -0.497018 -0.453583 0.680513 -0.274894 0.103404 0.471952 0.0907344 0.236521 -0.374138 -0.395616 -1.32798 -0.527041 -0.201945 -0.0629231
 40 | internal_count=5600 4560 3556 2970 2128 1004 1040 1842 946 842 500 689 559 324 342 680 126 351 896 586 286 269 146 258 130 281 498 73 223 200
 41 | shrinkage=0.05
 42 | 
 43 | 
 44 | Tree=2
 45 | num_leaves=31
 46 | num_cat=0
 47 | split_feature=27 26 24 22 5 3 5 22 27 26 26 22 22 22 24 26 26 27 5 18 18 27 24 22 1 24 4 26 13 19
 48 | split_gain=148.117 184.435 89.3492 94.5529 40.4413 27.3787 27.2311 19.0733 22.2821 18.7714 16.1379 13.5399 13.5162 18.567 13.3548 13.0963 13.0848 12.6295 12.4813 11.7266 11.1861 10.885 10.6951 12.1689 10.6911 10.6851 10.5366 15.4816 10.235 10.0934
 49 | threshold=0.90250000000000019 0.81150000000000011 1.0265000000000002 1.0445000000000002 0.82050000000000012 1.9205000000000003 0.89650000000000007 1.0355000000000001 0.83850000000000013 1.3665 1.2195000000000003 1.5530000000000002 1.0465000000000002 0.74450000000000016 0.78250000000000008 0.89850000000000019 0.77250000000000008 1.0885000000000002 1.3665 -0.80349999999999988 0.037500000000000012 0.73350000000000015 0.73550000000000015 0.8015000000000001 -0.50249999999999984 0.71850000000000014 -1.2844999999999998 0.86050000000000015 0.78050000000000008 0.8035000000000001
 50 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 51 | left_child=1 4 3 17 16 12 7 8 26 14 24 -11 13 -4 28 -10 25 22 -6 -13 21 -18 -2 -24 -7 29 27 -3 -5 -1
 52 | right_child=2 6 5 9 18 10 -8 -9 15 11 -12 19 -14 -15 -16 -17 20 -19 -20 -21 -22 -23 23 -25 -26 -27 -28 -29 -30 -31
 53 | leaf_value=-0.0073029196712544242 -0.016483738266246096 -0.056226662966493993 0.037243245973663699 -0.010522150734305409 0.0026461442993008974 0.042245029951626607 0.046440568748069885 0.04858761207873491 -0.04773094810436717 -0.084102271262559564 -0.065655930649870728 -0.063081051195993595 0.015762579862828231 -0.011458033620908884 -0.0058759188038779945 0.0046124893907736877 0.0083286333310169571 -0.086782249315688301 0.050838468825565433 0.0097202305841271319 0.025447545611885669 -0.058886351658876616 -0.020895762195453083 -0.072301035629168051 -0.033948366204301507 -0.046187366594338752 0.031795753626302992 0.0324895757065886 0.060645280643654702 -0.049344488016781651
 54 | leaf_count=189 58 36 90 29 327 30 934 224 68 41 98 34 526 630 203 164 47 207 65 64 74 50 56 261 48 370 483 44 68 82
 55 | internal_value=0 0.298664 -0.358922 -0.810713 -0.317348 -0.0338865 0.680672 0.453822 0.308105 -0.107649 -0.772754 -0.71612 0.070829 -0.107589 0.174807 -0.215088 -0.571915 -1.33898 0.212204 -0.311304 -0.0786672 -0.526581 -1.11994 -1.26446 -0.0928599 -0.702852 0.524037 -0.150543 0.787251 -0.400718
 56 | internal_count=5600 3157 2443 1021 1204 1422 1953 1019 795 439 176 139 1246 720 300 232 812 582 392 98 171 97 375 317 78 641 563 80 97 271
 57 | shrinkage=0.05
 58 | 
 59 | 
 60 | Tree=3
 61 | num_leaves=31
 62 | num_cat=0
 63 | split_feature=25 26 27 25 25 25 26 24 22 27 5 27 22 27 3 5 25 3 5 22 17 6 5 4 25 13 24 27 24 3
 64 | split_gain=205.442 146.989 120.56 48.7787 42.9311 39.2895 31.1871 29.0385 55.8713 27.4702 25.904 24.2132 24.1528 24.5602 22.82 21.5809 17.496 15.6602 14.9168 14.0717 13.142 12.1895 11.6379 11.5599 13.7375 10.9054 12.5602 10.4004 10.2465 10.172
 65 | threshold=1.0565000000000002 0.78050000000000008 0.90450000000000019 0.6785000000000001 1.4915 0.62350000000000005 0.86950000000000016 1.0065000000000002 0.99950000000000017 0.78250000000000008 0.82550000000000001 0.75650000000000006 1.0025000000000002 1.0445000000000002 1.6740000000000002 0.91550000000000009 0.94850000000000012 1.8335000000000004 1.2445000000000002 0.73650000000000004 2.1615000000000006 -1.2374999999999998 1.5755000000000001 -0.039499999999999993 0.81350000000000011 0.96150000000000013 0.75150000000000006 0.9225000000000001 1.2035000000000002 0.45550000000000007
 66 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 67 | left_child=1 10 3 25 12 22 9 8 -7 16 29 21 13 15 18 -2 -5 20 -9 -17 -6 -12 -4 24 -11 26 -3 -13 -21 -1
 68 | right_child=4 2 5 6 17 7 -8 14 -10 23 11 27 -14 -15 -16 19 -18 -19 -20 28 -22 -23 -24 -25 -26 -27 -28 -29 -30 -31
 69 | leaf_value=-0.0082033028811009587 -0.03599909263529838 0.041746635640685148 -0.039580301392174347 0.058125446833881649 -0.047005132938931764 -0.057212826088312366 0.062685920491323349 0.023242950454383147 0.021379314442893989 0.03458350647962026 -0.0472128223059173 -0.020425259197421095 0.0050919136779366547 -0.064092400066437114 -0.013299083017745733 0.071779011675729365 -0.011395179058987818 -0.096962996070842092 0.062990739000158127 -0.017861215058779942 0.015256644821202901 0.029991875111881828 0.019987201999956301 0.02216413692018528 -0.061614855385568858 0.039569321922273241 -0.0091237530592508161 -0.097929980712076425 0.037995357051269753 -0.038162993370534026
 70 | leaf_count=145 247 62 248 246 599 156 854 358 217 21 23 82 361 142 131 30 43 60 130 123 36 196 38 75 52 102 234 22 45 522
 71 | internal_value=0 0.259168 0.483726 0.837842 -0.58416 0.0197737 1.02885 0.209041 -0.232428 0.591867 -0.408289 0.0584268 -0.30659 -0.557134 0.476091 -0.325475 0.954461 -0.961879 0.676464 0.167993 -0.869502 0.43756 -0.633747 -0.1107 -0.679159 0.224196 0.0295668 -0.737286 -0.0574093 -0.633103
 72 | internal_count=5600 3957 2967 1689 1643 1278 1291 992 373 437 990 323 948 587 619 445 289 695 488 198 635 219 286 148 73 398 296 104 168 667
 73 | shrinkage=0.05
 74 | 
 75 | 
 76 | Tree=4
 77 | num_leaves=31
 78 | num_cat=0
 79 | split_feature=25 26 27 25 25 25 26 24 22 27 5 27 22 27 3 5 25 3 5 22 24 17 0 25 6 5 11 5 14 26
 80 | split_gain=185.695 133.172 109.85 44.7552 38.7872 35.4665 28.7556 26.2881 48.8607 25.0437 23.3658 21.8754 21.7989 22.1947 19.956 19.4566 16.0033 14.3464 12.8726 12.8084 12.7319 16.2341 14.2999 12.7886 11.0086 10.8517 10.7611 10.4892 10.4273 10.1204
 81 | threshold=1.0565000000000002 0.78050000000000008 0.90450000000000019 0.6785000000000001 1.4915 0.62350000000000005 0.86950000000000016 1.0185000000000002 0.99950000000000017 0.78250000000000008 0.82550000000000001 0.75650000000000006 1.0025000000000002 1.0445000000000002 1.6740000000000002 0.91550000000000009 0.94850000000000012 1.8335000000000004 1.2445000000000002 0.73650000000000004 1.3545000000000003 1.5825000000000002 1.1075000000000002 2.0390000000000006 -1.2374999999999998 1.2105000000000001 -0.39449999999999991 1.5755000000000001 0.88850000000000018 0.84050000000000014
 82 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 83 | left_child=1 10 3 -3 12 27 9 8 -7 16 -1 24 13 15 18 -2 -5 20 -9 -17 22 -22 23 25 -12 -6 -16 -4 -11 -14
 84 | right_child=4 2 5 6 17 7 -8 14 -10 28 11 -13 29 -15 26 19 -18 -19 -20 -21 21 -23 -24 -25 -26 -27 -28 -29 -30 -31
 85 | leaf_value=-0.03007466406260563 -0.034171014563527391 0.010667027800453588 -0.037567210194892987 0.055687579969788049 -0.041920232142950868 -0.053424147836170721 0.060126088198485833 0.02248874373378508 0.019554118609331331 -0.020358898894108236 -0.044708771572039417 -0.035026924163452405 -0.064419733632072249 -0.06092732837760189 0.023364535810334042 0.068723767475656228 -0.010814481443820212 -0.092627105268016957 0.059791420812017451 -0.0027257930963349077 -0.03310238735004007 0.047694654553158029 -0.077769747350913215 -0.072840744508540084 0.028568707780727565 0.00059459190294235478 -0.035450464047234551 0.019014778632459699 0.041777501926684335 0.0089110851022509169
 86 | leaf_count=667 247 398 248 246 190 157 854 354 223 112 23 104 20 142 52 30 43 60 128 168 112 32 128 85 196 88 78 38 36 341
 87 | internal_value=0 0.247111 0.461601 0.801135 -0.555152 0.0188508 0.98524 0.199058 -0.214866 0.565015 -0.388064 0.0556022 -0.291313 -0.529343 0.457455 -0.309174 0.913171 -0.914301 0.64701 0.15981 -0.826095 -0.303144 -0.979741 -0.776913 0.41656 -0.569222 -0.23828 -0.601691 -0.105129 0.0967787
 88 | internal_count=5600 3957 2967 1689 1643 1278 1291 992 380 437 990 323 948 587 612 445 289 695 482 198 635 144 491 363 219 278 130 286 148 361
 89 | shrinkage=0.05
 90 | 
 91 | 
 92 | Tree=5
 93 | num_leaves=31
 94 | num_cat=0
 95 | split_feature=25 25 26 26 25 22 9 5 5 25 9 0 24 0 22 22 4 10 23 24 14 3 24 25 10 24 26 15 15 1
 96 | split_gain=190.967 123.643 75.9346 34.8826 32.1702 30.5379 28.9557 28.4996 20.4006 23.2065 16.3872 14.5294 12.6395 12.3923 12.0457 15.7034 11.4611 12.1532 11.4754 10.8305 10.4034 10.2247 9.77361 9.75748 12.5245 9.34506 9.1128 8.67953 8.61195 8.55797
 97 | threshold=1.0675000000000001 0.66550000000000009 0.77650000000000008 1.0825000000000002 1.5695000000000003 1.0265000000000002 1.1375000000000002 0.85250000000000015 0.88150000000000006 0.96550000000000014 1.6925000000000001 2.2790000000000004 0.97750000000000015 2.4065000000000007 0.81050000000000011 1.0885000000000002 -0.26349999999999996 -0.60149999999999981 1.1125000000000003 0.6705000000000001 1.2275000000000003 1.8335000000000004 2.1485000000000007 0.96950000000000014 -0.39949999999999991 0.74850000000000017 0.98850000000000016 0.24750000000000003 1.0415000000000003 1.1045000000000003
 98 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 99 | left_child=1 6 7 8 5 12 14 20 9 29 13 -10 19 23 -1 16 17 -16 -18 -2 -3 22 26 -5 -25 -17 -6 -9 -7 -4
100 | right_child=4 2 3 10 21 28 -8 27 11 -11 -12 -13 -14 -15 15 25 18 -19 -20 -21 -22 -23 -24 24 -26 -27 -28 -29 -30 -31
101 | leaf_value=-0.0030615752742305952 0.0034703107205822795 -0.043631014576037838 0.039189339840581178 0.014774265448395028 -0.0018557887687825536 0.012316974642225694 0.017687564096601518 0.050194629936530677 0.052134220065784366 -0.010866723863100608 0.053746456132888858 -0.0095913829579568795 -0.017332207422173129 -0.066294385968032582 -0.053649680045280951 0.032692040483127113 -0.053259950735346007 -0.0076413436957386881 -9.6079161295919247e-05 -0.050577461676237591 0.01644601971111637 -0.088988727124621914 0.013100356705756284 -0.060969485605520762 0.0057211937142106865 -0.01879091794552833 -0.05166032479513516 -0.0048558551988264016 -0.027640113500363734 0.01009254748359883
102 | leaf_count=272 42 231 602 354 40 284 234 60 798 138 73 41 329 25 78 60 309 221 47 325 33 58 28 44 80 87 459 56 67 125
103 | internal_value=0 0.216732 0.468216 0.608941 -0.599264 -0.388867 -0.286679 -0.360292 0.755607 0.537959 0.181737 0.981772 -0.63183 0.0527939 -0.425642 -0.549283 -0.68199 -0.392767 -0.924946 -0.887641 -0.722441 -0.975834 -0.88845 0.125086 -0.359609 0.0435006 -0.953164 0.471924 0.0936351 0.683439
104 | internal_count=5600 3968 2660 2280 1632 1047 1308 380 1704 865 576 839 696 503 1074 802 655 299 356 367 264 585 527 478 124 147 499 116 351 727
105 | shrinkage=0.05
106 | 
107 | 
108 | Tree=6
109 | num_leaves=31
110 | num_cat=0
111 | split_feature=27 26 22 24 5 5 27 22 22 27 27 26 0 10 24 27 22 1 6 22 22 24 20 24 27 9 0 10 0 19
112 | split_gain=112.686 117.357 56.5122 59.0278 30.7779 26.4615 19.5527 18.104 17.3051 12.3645 12.2385 17.4938 11.7335 11.6296 11.5104 11.0966 13.7449 11.0212 11.4986 10.8982 11.2905 10.6987 10.5748 14.768 10.3464 10.1697 10.0578 9.70072 9.67154 9.01716
113 | threshold=0.90650000000000008 0.81150000000000011 1.0325000000000002 1.0935000000000004 0.88150000000000006 0.82050000000000012 1.0105000000000002 0.75750000000000017 1.0605000000000002 1.0885000000000002 0.82250000000000012 0.88850000000000018 1.5645000000000002 -1.0664999999999998 1.0475000000000001 0.99050000000000005 0.87450000000000017 1.7720000000000002 1.7735000000000003 0.70650000000000013 0.95150000000000012 0.73550000000000015 1.0000000180025095e-35 1.2565000000000002 0.72650000000000003 1.2125000000000001 1.6975000000000002 0.65350000000000008 1.1775000000000002 0.31150000000000005
114 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
115 | left_child=1 5 3 9 8 12 25 -5 10 21 13 -12 -1 29 -13 16 -9 18 -8 -6 22 -2 23 -21 -7 -4 -23 28 -26 -3
116 | right_child=2 4 6 7 19 24 17 15 -10 -11 11 14 -14 -15 -16 -17 -18 -19 -20 20 -22 26 -24 -25 27 -27 -28 -29 -30 -31
117 | leaf_value=-0.026892568273982145 -0.0018198074835837985 -0.049139701467658732 0.011803174586376282 0.0285833166038028 0.070372584328135215 0.021527319118560748 -0.0084518415853693414 0.040500661923382759 0.044452525144778793 -0.071079050871489899 -0.051427159681273815 -0.014110093022953716 0.01629471827039097 0.023981904242587733 0.031692113600085565 -0.026750228641287285 -0.017655985938328837 -0.063000598166036328 0.04230629182849218 0.006788641613653338 0.048012287280551444 -0.042508680384827385 0.047412398470806555 0.07803322289540672 -0.0051024750963037562 0.057311061946845036 -0.095086012578126558 0.026179293974610798 -0.066506156715099615 0.020603987183684228
118 | leaf_count=749 56 43 184 100 107 240 759 64 163 247 71 129 69 442 99 412 114 35 48 279 416 289 148 34 70 68 42 48 41 34
119 | internal_value=0 0.228708 -0.348017 -0.627004 0.53712 -0.259092 -0.0097917 -0.220541 0.284205 -1.0695 0.164877 -0.159769 -0.464984 0.353124 0.114191 -0.354136 0.0650035 -0.156679 -0.108757 0.791752 0.717427 -0.846001 0.498612 0.288197 0.166245 0.48193 -0.983491 -0.230945 -0.557426 -0.372065
120 | internal_count=5600 3182 2418 1324 1965 1217 1094 690 981 634 818 299 818 519 228 590 178 842 807 984 877 387 461 313 399 252 331 159 111 77
121 | shrinkage=0.05
122 | 
123 | 
124 | Tree=7
125 | num_leaves=31
126 | num_cat=0
127 | split_feature=25 25 26 27 25 9 5 26 25 24 0 5 24 27 25 3 1 13 10 5 3 13 27 3 4 4 5 3 24 14
128 | split_gain=169.33 108.548 65.9297 60.5817 27.6196 25.1516 24.2582 23.9892 26.9228 18.3185 17.1149 18.0553 15.8765 14.0891 13.8709 13.3075 12.1236 11.7248 11.4836 11.2864 10.9738 10.7819 10.7279 10.3987 11.2564 10.3869 9.80083 9.64208 9.577 9.40305
129 | threshold=1.0675000000000001 0.66550000000000009 0.77650000000000008 0.90650000000000008 1.5585000000000002 1.1375000000000002 0.85250000000000015 0.86350000000000016 0.89550000000000007 0.97750000000000015 1.6975000000000002 1.2635000000000003 0.94450000000000012 0.93850000000000011 0.96950000000000014 1.9805000000000004 0.5495000000000001 1.3135000000000003 -0.47449999999999992 0.88450000000000017 1.7525000000000002 0.9405 0.79950000000000021 0.34850000000000003 -0.56549999999999989 -0.13099999999999998 1.3365000000000002 1.8335000000000004 0.6755000000000001 1.2275000000000003
130 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
131 | left_child=1 5 6 7 9 20 29 8 22 13 11 14 -12 19 -5 -13 -14 18 -11 28 21 23 -4 24 -1 -24 -25 -6 -2 -3
132 | right_child=4 2 3 10 27 -7 -8 -9 -10 17 12 15 16 -15 -16 -17 -18 -19 -20 -21 -22 -23 25 26 -26 -27 -28 -29 -30 -31
133 | leaf_value=0.058279249497736801 0.023186751297265787 -0.040371287309234005 0.050883792970153677 0.015113831009609113 -0.041218658389127381 0.016607192668503433 0.021994432249266838 0.052529550218962932 -0.013156167059396942 0.0097867198262740086 -0.073979166268815427 0.053138057825276211 0.022809967003351726 -0.05424363551507503 -0.019995989396257314 -0.016616039035163285 -0.057112668477426055 0.019149606947993163 -0.027483268760758541 0.0024848841740828008 -0.05007105193334449 -0.00021978833304006642 -0.040202580922408963 -0.030471645743298467 -0.021400565852111707 0.043639884042823346 0.030380864132665965 -0.084790534118077465 -0.043273434223432126 0.016763322247873042
134 | leaf_count=31 25 231 234 459 545 234 116 901 139 128 43 166 67 181 151 33 27 147 237 139 108 291 29 574 42 31 28 58 172 33
135 | internal_value=0 0.205756 0.442814 0.574823 -0.56431 -0.265809 -0.330094 0.858235 0.466921 -0.364634 0.187518 0.299026 -0.470626 -0.63153 0.127762 0.827668 -0.00490865 -0.0958748 -0.288294 -0.388983 -0.395298 -0.327493 0.818035 -0.466451 0.248553 0.06036 -0.553058 -0.906268 -0.697205 -0.664315
136 | internal_count=5600 3968 2660 2280 1632 1308 380 1334 433 1029 946 809 137 517 610 199 94 512 365 336 1074 966 294 675 73 60 602 603 197 264
137 | shrinkage=0.05
138 | 
139 | 
140 | Tree=8
141 | num_leaves=31
142 | num_cat=0
143 | split_feature=25 25 25 26 27 5 9 5 0 27 22 22 22 27 25 26 25 27 9 5 14 22 13 3 15 22 27 25 26 8
144 | split_gain=153.747 74.6785 62.9554 53.3031 48.4209 26.7661 22.7111 20.2448 20.1534 14.0381 14.3734 13.9208 16.103 16.0004 13.4998 12.3294 15.3413 13.8927 12.0543 11.43 10.8403 10.8267 11.9293 10.6424 10.4214 10.3729 10.0297 9.99064 9.8811 10.7027
145 | threshold=1.2450000000000003 0.66550000000000009 0.96550000000000014 0.77650000000000008 0.90250000000000019 0.87650000000000006 1.1375000000000002 0.80850000000000011 1.6975000000000002 0.91150000000000009 1.0215000000000003 0.71850000000000014 1.0445000000000002 1.0705000000000002 1.9405000000000003 0.87150000000000005 0.7955000000000001 0.8015000000000001 1.0415000000000003 1.8805000000000003 0.7360000000000001 0.69950000000000012 0.9405 0.34850000000000003 -1.4779999999999998 1.1035000000000001 0.89550000000000007 0.78850000000000009 0.85650000000000015 1.0000000180025095e-35
146 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
147 | left_child=1 6 3 7 15 18 21 20 19 28 -11 -2 13 24 -14 17 -17 -5 -4 -6 -3 -1 23 -23 -13 -25 -24 -19 29 -7
148 | right_child=11 2 5 4 8 9 -8 -9 -10 10 -12 12 14 -15 -16 16 -18 27 -20 -21 -22 22 26 25 -26 -27 -28 -29 -30 -31
149 | leaf_value=0.012288530788624094 0.024068018420805623 -0.042495991684968995 -0.031699583604026603 0.04227609406127486 0.016060699872508057 0.050742157044992658 0.015797939411350791 0.023336084066936356 -0.026825587643730393 -0.024191766445828797 0.018702016278240637 0.027778958345555649 -0.0072158239821684851 -0.065730680401898708 -0.047522718741986707 0.030669379657376147 0.064641592298539191 0.034839393857520072 0.0065463020192192637 0.069661647436787738 0.0080894106407500364 0.0089267893574030473 0.0079606632241629085 -0.038247232417906674 -0.037833702821507414 0.002275745388539007 -0.031349276129300843 -0.034790931573335911 0.042827659107599371 -0.040262816106403178
150 | leaf_count=102 40 160 351 322 576 21 234 114 111 160 156 26 265 255 123 199 499 38 109 44 58 66 196 540 418 72 98 47 165 35
151 | internal_value=0 0.149398 0.319459 0.515955 0.654136 -0.0979627 -0.25266 -0.223205 0.254663 0.209094 -0.0612112 -0.683145 -0.726092 -0.909178 -0.398452 0.929816 1.09835 0.648613 -0.452792 0.396546 -0.580203 -0.375685 -0.440957 -0.587179 -0.680015 -0.669632 -0.103189 -0.0750968 0.603433 -0.125406
152 | internal_count=5600 4473 3165 2168 1836 997 1308 332 731 537 316 1127 1087 699 388 1105 698 407 460 620 218 1074 972 678 444 612 294 85 221 56
153 | shrinkage=0.05
154 | 
155 | 
156 | Tree=9
157 | num_leaves=31
158 | num_cat=0
159 | split_feature=25 25 26 27 25 22 9 5 25 9 27 5 5 8 14 5 25 27 5 22 22 23 4 25 13 26 27 5 25 3
160 | split_gain=139.484 89.6879 56.1802 58.8554 23.4395 24.7608 20.5153 19.0123 16.3977 15.3931 12.4962 14.7831 12.3695 12.163 11.2701 11.1618 10.7229 17.1675 11.4517 10.2112 12.5602 10.0368 11.3407 9.88219 9.4946 9.44594 13.0539 16.2791 9.17487 9.09626
161 | threshold=1.0675000000000001 0.66550000000000009 0.8055000000000001 0.90450000000000019 1.5585000000000002 1.0265000000000002 1.1375000000000002 0.85250000000000015 0.94550000000000012 1.2975000000000001 1.0025000000000002 0.98050000000000004 0.88150000000000006 1.6300000000000001 0.7360000000000001 0.63850000000000018 0.90950000000000009 1.0585000000000002 0.59450000000000014 0.81050000000000011 1.0885000000000002 1.1355000000000002 0.013500000000000002 0.98950000000000016 0.90850000000000009 0.89850000000000019 0.8015000000000001 0.44950000000000007 0.86450000000000016 1.8335000000000004
162 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
163 | left_child=1 6 7 12 5 10 19 8 14 13 11 -2 23 16 -3 -15 18 -18 -5 -1 21 22 -21 25 -23 26 27 -4 -20 -6
164 | right_child=4 2 3 9 29 -7 -8 -9 -10 -11 -12 -13 -14 15 -16 -17 17 -19 28 20 -22 24 -24 -25 -26 -27 -28 -29 -30 -31
165 | leaf_value=-0.0010723790862061031 -0.032065579686550126 -0.025817848130788558 -0.041706760072809143 0.05101366335179694 -0.037546866648071817 0.0057917137227283559 0.015029066991679822 0.019216551694225318 -0.073573436435715484 0.032167953525359685 -0.048250477157948071 0.0042246291995435663 0.053620629139660175 -0.036128219855601378 0.019153854461955454 0.023439751205089562 0.007818585977361691 -0.063999798430316848 -0.017197927927741483 -0.020886475178484278 0.0029597269639691642 -0.021174769179526431 -0.049082723913883491 -0.0037188796176172616 0.052422302980316859 0.052421343073495733 -0.0095669862153275036 0.047955591301407002 0.032057230856236921 -0.08044440319907023
166 | leaf_count=272 305 210 24 41 545 338 234 156 55 223 206 180 595 36 77 280 60 77 182 307 147 51 270 67 27 248 81 199 49 58
167 | internal_value=0 0.18794 0.405503 0.550009 -0.513113 -0.329303 -0.240248 -0.203493 -0.467674 0.177547 -0.547702 -0.372446 0.856269 0.035607 -0.274277 0.332745 -0.19379 -0.652625 0.0382857 -0.357194 -0.471231 -0.590177 -0.681546 0.65415 0.0853431 0.746856 0.505542 0.765334 -0.136097 -0.82998
168 | internal_count=5600 3968 2660 2162 1632 1029 1308 498 342 948 691 485 1214 725 287 316 409 137 272 1074 802 655 577 619 78 552 304 223 231 603
169 | shrinkage=0.05
170 | 
171 | 
172 | end of trees
173 | 
174 | feature importances:
175 | feature_25=45
176 | feature_5=36
177 | feature_27=35
178 | feature_22=35
179 | feature_26=31
180 | feature_24=28
181 | feature_3=19
182 | feature_9=10
183 | feature_13=9
184 | feature_0=8
185 | feature_4=8
186 | feature_14=5
187 | feature_10=5
188 | feature_19=5
189 | feature_1=4
190 | feature_15=3
191 | feature_6=3
192 | feature_23=3
193 | feature_8=2
194 | feature_18=2
195 | feature_17=2
196 | feature_11=1
197 | feature_20=1
198 | 
199 | parameters:
200 | [boosting: gbdt]
201 | [objective: binary]
202 | [metric: binary_logloss]
203 | [tree_learner: serial]
204 | [device_type: cpu]
205 | [data: ]
206 | [valid: ]
207 | [num_iterations: 100]
208 | [learning_rate: 0.05]
209 | [num_leaves: 31]
210 | [num_threads: 0]
211 | [max_depth: -1]
212 | [min_data_in_leaf: 20]
213 | [min_sum_hessian_in_leaf: 0.001]
214 | [bagging_fraction: 0.8]
215 | [bagging_freq: 5]
216 | [bagging_seed: 3]
217 | [feature_fraction: 0.9]
218 | [feature_fraction_seed: 2]
219 | [early_stopping_round: 0]
220 | [max_delta_step: 0]
221 | [lambda_l1: 0]
222 | [lambda_l2: 0]
223 | [min_gain_to_split: 0]
224 | [drop_rate: 0.1]
225 | [max_drop: 50]
226 | [skip_drop: 0.5]
227 | [xgboost_dart_mode: 0]
228 | [uniform_drop: 0]
229 | [drop_seed: 4]
230 | [top_rate: 0.2]
231 | [other_rate: 0.1]
232 | [min_data_per_group: 100]
233 | [max_cat_threshold: 32]
234 | [cat_l2: 10]
235 | [cat_smooth: 10]
236 | [max_cat_to_onehot: 4]
237 | [top_k: 20]
238 | [monotone_constraints: ]
239 | [feature_contri: ]
240 | [forcedsplits_filename: ]
241 | [refit_decay_rate: 0.9]
242 | [verbosity: 0]
243 | [max_bin: 255]
244 | [min_data_in_bin: 3]
245 | [bin_construct_sample_cnt: 200000]
246 | [histogram_pool_size: -1]
247 | [data_random_seed: 1]
248 | [output_model: LightGBM_model.txt]
249 | [snapshot_freq: -1]
250 | [input_model: ]
251 | [output_result: LightGBM_predict_result.txt]
252 | [initscore_filename: ]
253 | [valid_data_initscores: ]
254 | [pre_partition: 0]
255 | [enable_bundle: 1]
256 | [max_conflict_rate: 0]
257 | [is_enable_sparse: 1]
258 | [sparse_threshold: 0.8]
259 | [use_missing: 1]
260 | [zero_as_missing: 0]
261 | [two_round: 0]
262 | [save_binary: 0]
263 | [enable_load_from_binary_file: 1]
264 | [header: 0]
265 | [label_column: ]
266 | [weight_column: ]
267 | [group_column: ]
268 | [ignore_column: ]
269 | [categorical_feature: ]
270 | [predict_raw_score: 0]
271 | [predict_leaf_index: 0]
272 | [predict_contrib: 0]
273 | [num_iteration_predict: -1]
274 | [pred_early_stop: 0]
275 | [pred_early_stop_freq: 10]
276 | [pred_early_stop_margin: 10]
277 | [convert_model_language: ]
278 | [convert_model: gbdt_prediction.cpp]
279 | [num_class: 1]
280 | [is_unbalance: 0]
281 | [scale_pos_weight: 1]
282 | [sigmoid: 1]
283 | [boost_from_average: 1]
284 | [reg_sqrt: 0]
285 | [alpha: 0.9]
286 | [fair_c: 1]
287 | [poisson_max_delta_step: 0.7]
288 | [tweedie_variance_power: 1.5]
289 | [max_position: 20]
290 | [label_gain: ]
291 | [metric_freq: 1]
292 | [is_provide_training_metric: 0]
293 | [eval_at: ]
294 | [num_machines: 1]
295 | [local_listen_port: 12400]
296 | [time_out: 120]
297 | [machine_list_filename: ]
298 | [machines: ]
299 | [gpu_platform_id: -1]
300 | [gpu_device_id: -1]
301 | [gpu_use_dp: 0]
302 | 
303 | end of parameters
304 | 
305 | pandas_categorical:null
306 | 


--------------------------------------------------------------------------------