├── pgportfolio
    ├── autotrain
    │   ├── __init__.py
    │   ├── generate.py
    │   └── training.py
    ├── learn
    │   ├── __init__.py
    │   ├── rollingtrainer.py
    │   ├── network.py
    │   ├── nnagent.py
    │   └── tradertrainer.py
    ├── tools
    │   ├── __init__.py
    │   ├── indicator.py
    │   ├── shortcut.py
    │   ├── trade.py
    │   ├── configprocess.py
    │   └── data.py
    ├── trade
    │   ├── __init__.py
    │   ├── backtest.py
    │   └── trader.py
    ├── resultprocess
    │   ├── __init__.py
    │   ├── table.py
    │   └── plot.py
    ├── __init__.py
    ├── tdagent
    │   ├── __init__.py
    │   ├── algorithms
    │   │   ├── __init__.py
    │   │   ├── ubah.py
    │   │   ├── sp.py
    │   │   ├── crp.py
    │   │   ├── rmr_deprecated.py
    │   │   ├── m0.py
    │   │   ├── best.py
    │   │   ├── wmamr.py
    │   │   ├── eg.py
    │   │   ├── rmr.py
    │   │   ├── olmar2.py
    │   │   ├── ons.py
    │   │   ├── bcrp.py
    │   │   ├── cwmr_var.py
    │   │   ├── up.py
    │   │   ├── cornu.py
    │   │   ├── pamr.py
    │   │   ├── bk_deprecated.py
    │   │   ├── olmar.py
    │   │   ├── cornk.py
    │   │   ├── corn_deprecated.py
    │   │   ├── cwmr_std.py
    │   │   ├── bnn.py
    │   │   ├── anticor1.py
    │   │   ├── bk.py
    │   │   ├── anticor2.py
    │   │   └── anticor_deprecated.py
    │   └── tdagent.py
    ├── marketdata
    │   ├── __init__.py
    │   ├── replaybuffer.py
    │   ├── poloniex.py
    │   ├── coinlist.py
    │   ├── datamatrices.py
    │   └── globaldatamatrix.py
    ├── constants.py
    └── net_config.json
├── requirements.txt
├── setup.py
├── .gitignore
├── README.md
├── main.py
└── user_guide.md


/pgportfolio/autotrain/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pgportfolio/learn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pgportfolio/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pgportfolio/trade/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pgportfolio/resultprocess/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pgportfolio/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | # -*- coding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/pgportfolio/marketdata/__init__.py:
--------------------------------------------------------------------------------
1 | #from __future__ import absolute_import
2 | #from src import constants


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=1.0.0
2 | tflearn>=0.3.2
3 | pympler>=0.5
4 | cvxopt>=1.1.9
5 | seaborn>=0.8.1
6 | pandas>=0.20.3
7 | 


--------------------------------------------------------------------------------
/pgportfolio/resultprocess/table.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function, absolute_import
2 | import numpy as np
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from pgportfolio.tdagent import *
3 | # -*- coding: utf-8 -*-
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md") as f:
 4 |     readme = f.read()
 5 | 
 6 | setup(
 7 |     name="pgportfolio",
 8 |     version="1.0.0",
 9 |     description="",
10 |     long_description=readme,
11 |     author="Zhengyao Jiang, Dixing Xu, Jinjun Liang",
12 |     author_email="",
13 |     packages=find_packages(exclude=("tests", "docs"),
14 |                            include=("matplotlib", "tensorflow", "tflearn", "pandas",
15 |                                     "pandas", "cvxopt", "scipy")))


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/ubah.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | class UBAH(TDAgent):
 5 | 
 6 |     def __init__(self, b = None):
 7 |         super(UBAH, self).__init__()
 8 |         self.b = b
 9 | 
10 |     def decide_by_history(self, x, last_b):
11 |         '''return new portfolio vector
12 |         :param x: input matrix
13 |         :param last_b: last portfolio weight vector
14 |         '''
15 |         if self.b is None:
16 |             self.b = np.ones(12) / 12
17 |         else:
18 |             self.b = last_b
19 |         return self.b
20 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/sp.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | class SP(TDAgent):
 5 |     '''Switch Portfolio'''
 6 |     def __init__(self, gamma=0.25, last_b=None):
 7 |         super(SP, self).__init__()
 8 |         self.gamma = gamma
 9 |         self.last_b = last_b
10 | 
11 |     def decide_by_history(self, x, last_b):
12 |         self.record_history(x)
13 |         nx = self.history[-1,:].ravel()
14 |         if self.last_b is None:
15 |             self.last_b = np.ones(nx.size) / nx.size
16 |         b = self.last_b * (1-self.gamma-self.gamma/nx.size) + self.gamma/nx.size
17 |         b = b / np.sum(b)
18 |         self.last_b = b
19 |         return b
20 | 


--------------------------------------------------------------------------------
/pgportfolio/constants.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from os import path
 4 | 
 5 | DATABASE_DIR = path.realpath(__file__).\
 6 |     replace('pgportfolio/constants.pyc','/database/Data.db').\
 7 |     replace("pgportfolio\\constants.pyc","database\\Data.db").\
 8 |     replace('pgportfolio/constants.py','/database/Data.db').\
 9 |     replace("pgportfolio\\constants.py","database\\Data.db")
10 | CONFIG_FILE_DIR = 'net_config.json'
11 | LAMBDA = 1e-4  # lambda in loss function 5 in training
12 |    # About time
13 | NOW = 0
14 | FIVE_MINUTES = 60 * 5
15 | FIFTEEN_MINUTES = FIVE_MINUTES * 3
16 | HALF_HOUR = FIFTEEN_MINUTES * 2
17 | HOUR = HALF_HOUR * 2
18 | TWO_HOUR = HOUR * 2
19 | FOUR_HOUR = HOUR * 4
20 | DAY = HOUR * 24
21 | YEAR = DAY * 365
22 |    # trading table name
23 | TABLE_NAME = 'test'
24 | 
25 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/crp.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | 
 5 | class CRP(TDAgent):
 6 |     """ Constant rebalanced portfolio = use fixed weights all the time. Uniform weights are commonly used as a benchmark.
 7 | 
 8 |     Reference:
 9 |         T. Cover. Universal Portfolios, 1991.
10 |         http://www-isl.stanford.edu/~cover/papers/paper93.pdf
11 |     """
12 | 
13 |     def __init__(self, b=None):
14 |         """
15 |         :params b: Constant rebalanced portfolio weights. Default is uniform.
16 |         """
17 |         super(CRP, self).__init__()
18 |         self.b = b
19 | 
20 |     def decide_by_history(self, x, last_b):
21 |         x = self.get_last_rpv(x)
22 | 
23 |         # init b to default if necessary
24 |         if self.b is None:
25 |             self.b = np.ones(len(x)) / len(x)
26 |         return self.b
27 | 
28 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/rmr_deprecated.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from pgportfolio.tdagent.algorithms.olmar import OLMAR
 4 | 
 5 | class RMR(OLMAR):
 6 |     '''universal-portfolio implementation'''
 7 |     def __init__(self, window=5, eps=10, tau=1e-3):
 8 |         super(RMR, self).__init__(window, eps)
 9 |         self.tau = tau
10 | 
11 |     def decide_by_history(self, x, last_b):
12 |         self.record_history(x)
13 |         close = pd.DataFrame(self.get_close())
14 |         nx = close.iloc[-1,:]
15 |         #print close.shape
16 |         y = close.mean()
17 |         y_last = None
18 |         while y_last is None or norm(y-y_last)/norm(y_last)>self.tau:
19 |             y_last=y
20 |             d=norm(close-y)
21 |             y = close.div(d, axis=0).sum() / (1./d).sum()
22 |         return y/nx
23 | 
24 | def norm(x):
25 |     if isinstance(x, pd.Series):
26 |         axis=0
27 |     else:
28 |         axis=1
29 |     return np.sqrt((x**2).sum(axis=axis))
30 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/m0.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | 
 5 | class M0(TDAgent):
 6 |     """ Constant rebalanced portfolio = use fixed weights all the time. Uniform weights are commonly used as a benchmark.
 7 | 
 8 |     Reference:
 9 |         T. Cover. Universal Portfolios, 1991.
10 |         http://www-isl.stanford.edu/~cover/papers/paper93.pdf
11 |     """
12 | 
13 |     def __init__(self, beta=0.5, C=None):
14 |         """
15 |         :params b: Constant rebalanced portfolio weights. Default is uniform.
16 |         """
17 |         super(M0, self).__init__()
18 |         self.beta = beta
19 |         self.C = C
20 | 
21 |     def decide_by_history(self, x, last_b):
22 |         x = self.get_last_rpv(x)
23 |         m = x.size
24 |         if self.C is None:
25 |             self.C = np.zeros((m,1))
26 |         b = (self.C + self.beta) / (m * self.beta + np.ones((1,m)).dot(self.C))
27 |         max_ind = np.argmax(x)
28 |         self.C[max_ind] += 1
29 | 
30 |         return b.ravel()
31 | 
32 | 


--------------------------------------------------------------------------------
/pgportfolio/net_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "layers":
 3 |   [
 4 |     {"filter_shape": [1, 2], "filter_number": 3, "type": "ConvLayer"},
 5 |     {"filter_number":10, "type": "EIIE_Dense", "regularizer": "L2", "weight_decay": 5e-9},
 6 |     {"type": "EIIE_Output_WithW","regularizer": "L2", "weight_decay": 5e-8}
 7 |   ],
 8 |   "training":{
 9 |     "steps":80000,
10 |     "learning_rate":0.00028,
11 |     "batch_size":109,
12 |     "buffer_biased":5e-5,
13 |     "snap_shot":false,
14 |     "fast_train":true,
15 |     "training_method":"Adam",
16 |     "loss_function":"loss_function6"
17 |   },
18 | 
19 |   "input":{
20 |     "window_size":31,
21 |     "coin_number":11,
22 |     "global_period":1800,
23 |     "feature_number":3,
24 |     "test_portion":0.08,
25 |     "online":false,
26 |     "start_date":"2015/07/01",
27 |     "end_date":"2017/07/01",
28 |     "volume_average_days":30
29 |   },
30 | 
31 |   "trading":{
32 |     "trading_consumption":0.0025,
33 |     "rolling_training_steps":85,
34 |     "learning_rate":0.00028,
35 |     "buffer_biased":5e-5
36 |   }
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/best.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | class BEST(TDAgent):
 5 |     '''Best Stock Strategy
 6 |     '''
 7 |     def __init__(self, last_b=None):
 8 |         super(BEST, self).__init__()
 9 |         self.last_b = last_b
10 | 
11 | 
12 |     def decide_by_history(self, data, last_b=None):
13 |         if self.last_b is None:
14 |             from pgportfolio.tools.trade import get_test_data
15 |             from pgportfolio.tools.configprocess import preprocess_config
16 |             import json
17 |             with open("pgportfolio/net_config.json") as file:
18 |                 config = json.load(file)
19 |             config = preprocess_config(config)
20 |             data = get_test_data(config)
21 |             data = data.T
22 |             n, m = data.shape
23 |             tmp_cumprod_ret = np.cumprod(data, axis=0)
24 |             best_ind = np.argmax(tmp_cumprod_ret[-1,:])
25 |             self.last_b = np.zeros(m)
26 |             self.last_b[best_ind] = 1
27 |         return self.last_b.ravel()
28 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/wmamr.py:
--------------------------------------------------------------------------------
 1 | from pgportfolio.tdagent.algorithms.pamr import PAMR
 2 | import numpy as np
 3 | 
 4 | 
 5 | class WMAMR(PAMR):
 6 |     """ Weighted Moving Average Passive Aggressive Algorithm for Online Portfolio Selection.
 7 |     It is just a combination of OLMAR and PAMR, where we use mean of past returns to predict
 8 |     next day's return.
 9 | 
10 |     Reference:
11 |         Li Gao, Weiguo Zhang
12 |         Weighted Moving Averag Passive Aggressive Algorithm for Online Portfolio Selection, 2013.
13 |         http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=6643896
14 |     """
15 | 
16 |     def __init__(self, window=5):
17 |         """
18 |         :param w: Windows length for moving average.
19 |         """
20 |         super(WMAMR, self).__init__()
21 | 
22 |         if window < 1:
23 |             raise ValueError('window parameter must be >=1')
24 |         self.window = window
25 | 
26 | 
27 |     def decide_by_history(self, x, last_b):
28 |         self.record_history(x)
29 |         xx = np.mean(self.history[-self.window:,], axis=0)
30 |         # calculate return prediction
31 |         b = self.update(last_b, xx, self.eps, self.C)
32 | 
33 |         return b
34 | 
35 | 


--------------------------------------------------------------------------------
/pgportfolio/autotrain/generate.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, absolute_import, division
 2 | import json
 3 | import os
 4 | import logging
 5 | from os import path
 6 | 
 7 | 
 8 | def add_packages(config, repeat=1):
 9 |     train_dir = "train_package"
10 |     package_dir = path.realpath(__file__).replace('pgportfolio/autotrain/generate.pyc',train_dir)\
11 |         .replace("pgportfolio\\autotrain\\generate.pyc", train_dir)\
12 |                   .replace('pgportfolio/autotrain/generate.py',train_dir)\
13 |         .replace("pgportfolio\\autotrain\\generate.py", train_dir)
14 |     all_subdir = [int(s) for s in os.listdir(package_dir) if os.path.isdir(package_dir+"/"+s)]
15 |     if all_subdir:
16 |         max_dir_num = max(all_subdir)
17 |     else:
18 |         max_dir_num = 0
19 |     indexes = []
20 | 
21 |     for i in range(repeat):
22 |         max_dir_num += 1
23 |         directory = package_dir+"/"+str(max_dir_num)
24 |         config["random_seed"] = i
25 |         os.makedirs(directory)
26 |         indexes.append(max_dir_num)
27 |         with open(directory + "/" + "net_config.json", 'w') as outfile:
28 |             json.dump(config, outfile, indent=4, sort_keys=True)
29 |     logging.info("create indexes %s" % indexes)
30 |     return indexes
31 | 
32 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/eg.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from ..tdagent import TDAgent
 3 | import numpy as np
 4 | 
 5 | class EG(TDAgent):
 6 |     """ Exponentiated Gradient (EG) algorithm by Helmbold et al.
 7 | 
 8 |     Reference:
 9 |         Helmbold, David P., et al.
10 |         "On‐Line Portfolio Selection Using Multiplicative Updates."
11 |         Mathematical Finance 8.4 (1998): 325-347.
12 |         http://www.cis.upenn.edu/~mkearns/finread/helmbold98line.pdf
13 |     """
14 | 
15 |     def __init__(self, eta=0.05, b=None, last_b=None):
16 |         """
17 |         :params eta: Learning rate. Controls volatility of weights.
18 |         """
19 |         super(EG, self).__init__()
20 |         self.eta = eta
21 |         self.b = b
22 |         self.last_b = last_b
23 | 
24 |     def init_pw(self, x):
25 |         self.b = np.ones(x.size)
26 | 
27 |     def decide_by_history(self, x, last_b):
28 |         self.record_history(x)
29 |         x = self.history[-1,:].ravel()
30 |         if self.last_b is None:
31 |             self.last_b = np.ones(x.size) / x.size
32 |         if self.b is None:
33 |             self.init_pw(x)
34 |         else:
35 |             self.b = self.last_b * np.exp(self.eta * x.T / np.dot(x,last_b))
36 |         b = self.b / np.sum(self.b)
37 |         self.last_b = b
38 |         return b
39 | 
40 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/rmr.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | class RMR(TDAgent):
 5 |     ''' Robust Median Reversion
 6 | 
 7 |     Reference:
 8 | 
 9 | 
10 |     '''
11 |     def __init__(self, eps=5, W=5, b=None):
12 |         '''
13 |         :param eps: the parameter control the reversion threshold
14 |         :pram W: the length of window
15 |         '''
16 |         super(RMR, self).__init__()
17 |         self.eps = eps
18 |         self.W = W
19 |         self.b = b
20 | 
21 |     def decide_by_history(self, x, last_b):
22 |         self.record_history(x)
23 |         data_close = self.get_close()
24 |         b = self.update(data_close, self.history, last_b, self.eps, self.W)
25 |         return b
26 | 
27 |     def update(self, data_close, data, last_b, eps, W):
28 |         t1 = data.shape[0]
29 |         if t1 < W+2:
30 |             x_t1 = data[t1-1, :]
31 |         else:
32 |             x_t1 = self.l1_median_VaZh(data_close[(t1-W):(t1-1),:]) / data_close[t1-1,:]
33 | 
34 |         if np.linalg.norm(x_t1 - x_t1.mean())**2 == 0:
35 |             tao = 0
36 |         else:
37 |             tao = min(0, (x_t1.dot(last_b)-eps) / np.linalg.norm(x_t1 - x_t1.mean())**2)
38 |         if self.b is None:
39 |             self.b = np.ones(data.shape[1])/data.shape[1]
40 |         else:
41 |             self.b -= tao * (x_t1 - x_t1.mean() * np.ones(x_t1.shape))
42 |             self.b = self.euclidean_proj_simplex(self.b)
43 |         return self.b
44 | 


--------------------------------------------------------------------------------
/pgportfolio/tools/indicator.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | import numpy as np
 3 | 
 4 | 
 5 | def max_drawdown(pc_array):
 6 |     """calculate the max drawdown with the portfolio changes
 7 |     @:param pc_array: all the portfolio changes during a trading process
 8 |     @:return: max drawdown
 9 |     """
10 |     portfolio_values = []
11 |     drawdown_list = []
12 |     max_benefit = 0
13 |     for i in range(pc_array.shape[0]):
14 |         if i > 0:
15 |             portfolio_values.append(portfolio_values[i - 1] * pc_array[i])
16 |         else:
17 |             portfolio_values.append(pc_array[i])
18 |         if portfolio_values[i] > max_benefit:
19 |             max_benefit = portfolio_values[i]
20 |             drawdown_list.append(0.0)
21 |         else:
22 |             drawdown_list.append(1.0 - portfolio_values[i] / max_benefit)
23 |     return max(drawdown_list)
24 | 
25 | 
26 | def sharpe(pc_array):
27 |     """calculate sharpe ratio with the portfolio changes
28 |     @:param pc_array: all the portfolio changes during a trading process
29 |     @:return: sharpe ratio
30 |     """
31 |     pc_array = pc_array-1.0
32 |     return np.mean(pc_array)/np.std(pc_array)
33 | 
34 | 
35 | def moving_accumulate(pc_array, n=48):
36 |     acc = np.cumprod(pc_array)
37 |     acc[n:] = acc[n:] / acc[:-n]
38 |     return acc
39 | 
40 | 
41 | def positive_count(pc_array):
42 |     return np.sum(pc_array>1)
43 | 
44 | 
45 | def negative_count(pc_array):
46 |     return np.sum(pc_array<1)
47 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/olmar2.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | class OLMAR2(TDAgent):
 5 |     '''Moving average reversion strategy for on-line portfolio selection
 6 | 
 7 |     Reference:
 8 |         Bin Li, Steven C.H. Hoi, Doyen Sahoo, Zhi-Yong Liu
 9 | 
10 |     '''
11 | 
12 |     def __init__(self,  eps=10, alpha=0.5, data_phi=None, b=None):
13 |         '''init
14 |         :param eps: mean reversion threshold
15 |         :param alpha: trade off parameter for moving average
16 |         '''
17 |         super(OLMAR2, self).__init__()
18 |         self.eps = eps
19 |         self.alpha = alpha
20 |         self.data_phi = data_phi
21 |         self.b = b
22 | 
23 | 
24 |     def decide_by_history(self, x, last_b):
25 |         self.record_history(x)
26 |         nx = self.get_last_rpv(x)
27 | 
28 |         if self.b is None:
29 |             self.b = np.ones(nx.size) / nx.size
30 |         last_b = self.b
31 |         if self.data_phi is None:
32 |             self.data_phi = np.ones((1,nx.size))
33 |         else:
34 |             self.data_phi = self.alpha + (1-self.alpha)*self.data_phi/nx
35 | 
36 |         ell = max(0, self.eps - self.data_phi.dot(last_b))
37 | 
38 |         x_bar = self.data_phi.mean()
39 |         denominator = np.linalg.norm(self.data_phi - x_bar)**2
40 | 
41 |         if denominator == 0:
42 |             lam = 0
43 |         else:
44 |             lam = ell / denominator
45 | 
46 |         self.data_phi = np.squeeze(self.data_phi)
47 |         b = last_b + lam * (self.data_phi - x_bar)
48 | 
49 |         b = self.euclidean_proj_simplex(b)
50 |         self.b = b
51 |         return self.b
52 | 


--------------------------------------------------------------------------------
/pgportfolio/tools/shortcut.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division,absolute_import,print_function
 2 | from pgportfolio.trade.backtest import BackTest
 3 | from pgportfolio.tdagent.algorithms import crp, ons, olmar, up, anticor1, pamr,\
 4 |     best, bk, cwmr_std, eg, sp, ubah, wmamr, bcrp, cornk, m0, rmr
 5 | 
 6 | # the dictionary of name of algorithms mapping to the constructor of tdagents
 7 | ALGOS = {"crp": crp.CRP, "ons": ons.ONS, "olmar": olmar.OLMAR, "up": up.UP,
 8 |          "anticor": anticor1.ANTICOR1, "pamr": pamr.PAMR,
 9 |          "best": best.BEST, "bk": bk.BK, "bcrp": bcrp.BCRP,
10 |          "corn": cornk.CORNK, "m0": m0.M0, "rmr": rmr.RMR,
11 |          "cwmr": cwmr_std.CWMR_STD, "eg": eg.EG, "sp": sp.SP, "ubah": ubah.UBAH,
12 |          "wmamr": wmamr.WMAMR}
13 | 
14 | def execute_backtest(algo, config):
15 |     """
16 |     @:param algo: string representing the name the name of algorithms
17 |     @:return: numpy array of portfolio changes
18 |     """
19 |     agent, agent_type, net_dir = _construct_agent(algo)
20 |     backtester = BackTest(config, agent=agent, agent_type=agent_type, net_dir=net_dir)
21 |     backtester.start_trading()
22 |     return backtester.test_pc_vector
23 | 
24 | 
25 | def _construct_agent(algo):
26 |     if algo.isdigit():
27 |         agent = None
28 |         agent_type = "nn"
29 |         net_dir = "./train_package/" + algo + "/netfile"
30 |     elif algo in ALGOS:
31 |         agent = ALGOS[algo]()
32 |         agent_type = "traditional"
33 |         net_dir = None
34 |     else:
35 |         message = "The algorithm name "+algo+" is not support. Supported algos " \
36 |                                              "are " + str(list(ALGOS.keys()))
37 |         raise LookupError(message)
38 |     return agent, agent_type, net_dir


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # database
104 | database/
105 | train_package/
106 | 
107 | .DS_Store
108 | .idea/
109 | *.db
110 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/ons.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | from cvxopt import solvers, matrix
 4 | solvers.options['show_progress'] = False
 5 | 
 6 | class ONS(TDAgent):
 7 |     """
 8 |     Online newton step algorithm.
 9 | 
10 |     Reference:
11 |         A.Agarwal, E.Hazan, S.Kale, R.E.Schapire.
12 |         Algorithms for Portfolio Management based on the Newton Method, 2006.
13 |         http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_AgarwalHKS06.pdf
14 |     """
15 |     def __init__(self, delta=0.125, beta=1., eta=0., A = None):
16 |         """
17 |         :param delta, beta, eta: Model parameters. See paper.
18 |         """
19 |         super(ONS, self).__init__()
20 |         self.delta = delta
21 |         self.beta = beta
22 |         self.eta = eta
23 |         self.A = A
24 | 
25 |     def init_portfolio(self, X):
26 |         m = X.size
27 |         self.A = np.mat(np.eye(m))
28 |         self.b = np.mat(np.zeros(m)).T
29 | 
30 | 
31 |     def decide_by_history(self, x, last_b):
32 |         '''
33 |         :param x: input matrix
34 |         :param last_b: last portfolio
35 |         '''
36 |         x = self.get_last_rpv(x)
37 |         if self.A is None:
38 |             self.init_portfolio(x)
39 | 
40 |         # calculate gradient
41 |         grad = np.mat(x / np.dot(last_b, x)).T
42 |         # update A
43 |         self.A += grad * grad.T
44 |         # update b
45 |         self.b += (1 + 1./self.beta) * grad
46 | 
47 |         # projection of p induced by norm A
48 |         pp = self.projection_in_norm(self.delta * self.A.I * self.b, self.A)
49 |         return pp * (1 - self.eta) + np.ones(len(x)) / float(len(x)) * self.eta
50 | 
51 |     def projection_in_norm(self, x, M):
52 |         """ Projection of x to simplex indiced by matrix M. Uses quadratic programming.
53 |         """
54 |         m = M.shape[0]
55 | 
56 |         P = matrix(2*M)
57 |         q = matrix(-2 * M * x)
58 |         G = matrix(-np.eye(m))
59 |         h = matrix(np.zeros((m,1)))
60 |         A = matrix(np.ones((1,m)))
61 |         b = matrix(1.)
62 | 
63 |         sol = solvers.qp(P, q, G, h, A, b)
64 |         return np.squeeze(sol['x'])
65 | 
66 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/bcrp.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | from pgportfolio.tdagent.algorithms.crp import CRP
 3 | import numpy as np
 4 | from scipy.optimize import minimize
 5 | 
 6 | class BCRP(CRP):
 7 |     """ Best Constant Rebalanced Portfolio = Constant Rebalanced Portfolio constructed with hindsight. It is often used as benchmark.
 8 | 
 9 |     Reference:
10 |         T. Cover. Universal Portfolios, 1991.
11 |         http://www-isl.stanford.edu/~cover/papers/paper93.pdf
12 |     """
13 | 
14 |     def __init__(self, last_b=None):
15 |         super(BCRP, self).__init__()
16 |         self.last_b = last_b
17 | 
18 |     def get_weight(self, data):
19 |         """ Find weights which maximize return on X in hindsight! """
20 |         weights = opt_weights(data)
21 |         return weights
22 | 
23 |     def decide_by_history(self, x, last_b):
24 |         if self.last_b is None:
25 |             from pgportfolio.tools.trade import get_test_data
26 |             from pgportfolio.tools.configprocess import preprocess_config
27 |             import json
28 |             with open("pgportfolio/net_config.json") as file:
29 |                 config = json.load(file)
30 |             config = preprocess_config(config)
31 |             data = get_test_data(config)
32 |             self.last_b = self.get_weight(data.T)
33 | 
34 |         return self.last_b
35 | 
36 | 
37 | def opt_weights(X, max_leverage=1):
38 |     x_0 = max_leverage * np.ones(X.shape[1]) / float(X.shape[1])
39 |     objective = lambda b: -np.prod(X.dot(b))
40 |     cons = ({'type': 'eq', 'fun': lambda b: max_leverage-np.sum(b)},)
41 |     bnds = [(0., max_leverage)]*len(x_0)
42 |     res = minimize(objective, x_0, bounds=bnds, constraints=cons, method='slsqp', options={'ftol': 1e-07})
43 |     return res.x
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     from pgportfolio.tools.backtest import get_test_data
48 |     from pgportfolio.tools.configprocess import preprocess_config
49 |     import json
50 |     with open("pgportfolio/net_config.json") as file:
51 |         config = json.load(file)
52 |     config = preprocess_config(config)
53 |     data = get_test_data(config)
54 |     bcrp = BCRP()
55 |     result = bcrp.get_weight(data.T)
56 | 
57 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/cwmr_var.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | import scipy.stats
 4 | from numpy.linalg import inv
 5 | from numpy import diag, sqrt, log, trace
 6 | 
 7 | class CWMR_VAR(TDAgent):
 8 |     """ First variant of a CWMR outlined in original article. It is
 9 |     only approximation to the posted problem. """
10 |     def __init__(self, eps=-0.5, confidence=0.95, sigma=None):
11 |         """
12 |         :param eps: Mean reversion threshold (expected return on current day must be lower
13 |                     than this threshold). Recommended value is -0.5.
14 |         :param confidence: Confidence parameter for profitable mean reversion portfolio. Recommended value is 0.95.
15 |         """
16 |         super(CWMR_VAR, self).__init__()
17 |         # input check
18 |         if not (0 <= confidence <= 1):
19 |             raise ValueError('confidence must be from interval [0,1]')
20 | 
21 |         self.eps = eps
22 |         self.theta = scipy.stats.norm.ppf(confidence)
23 |         self.sigma = sigma
24 | 
25 |     def init_portfolio(self, X):
26 |         m = X.shape[1]
27 |         self.sigma = np.matrix(np.eye(m) / m**2)
28 | 
29 | 
30 |     def decide_by_history(self, x, last_b):
31 |         x = self.get_last_rpv(x)
32 |         x = np.reshape(x, (1,x.size))
33 |         last_b = np.reshape(last_b, (1,last_b.size))
34 |         if self.sigma is None:
35 |             self.init_portfolio(x)
36 |         # initialize
37 |         m = len(x)
38 |         mu = np.matrix(last_b).T
39 |         sigma = self.sigma
40 |         theta = self.theta
41 |         eps = self.eps
42 |         x = np.matrix(x).T    # matrices are easier to manipulate
43 | 
44 |         # 4. Calculate the following variables
45 |         M = (mu.T * x).mean()
46 |         V = x.T * sigma * x
47 |         x_upper = sum(diag(sigma) * x) / trace(sigma)
48 | 
49 |         # 5. Update the portfolio distribution
50 |         mu, sigma = self.update(x, x_upper, mu, sigma, M, V, theta, eps)
51 | 
52 |         # 6. Normalize mu and sigma
53 |         mu = self.simplex_proj(mu)
54 |         sigma = sigma / (m**2 * trace(sigma))
55 |         """
56 |         sigma(sigma < 1e-4*eye(m)) = 1e-4;
57 |         """
58 |         self.sigma = sigma
59 | 
60 |         return np.ravel(mu)
61 | 
62 | 


--------------------------------------------------------------------------------
/pgportfolio/marketdata/replaybuffer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division,absolute_import,print_function
 2 | import numpy as np
 3 | import logging
 4 | 
 5 | 
 6 | class ReplayBuffer:
 7 |     def __init__(self, start_index, end_index, batch_size, is_permed, coin_number, sample_bias=1.0):
 8 |         """
 9 |         :param start_index: start index of the training set on the global data matrices
10 |         :param end_index: end index of the training set on the global data matrices
11 |         """
12 |         self.__coin_number = coin_number
13 |         self.__experiences = [Experience(i) for i in range(start_index, end_index)]
14 |         self.__is_permed = is_permed
15 |         # NOTE: in order to achieve the previous w feature
16 |         self.__batch_size = batch_size
17 |         self.__sample_bias = sample_bias
18 |         logging.debug("buffer_bias is %f" % sample_bias)
19 | 
20 |     def append_experience(self, state_index):
21 |         self.__experiences.append(Experience(state_index))
22 |         logging.debug("a new experience, indexed by %d, was appended" % state_index)
23 | 
24 |     def __sample(self, start, end, bias):
25 |         """
26 |         @:param end: is excluded
27 |         @:param bias: value in (0, 1)
28 |         """
29 |         # TODO: deal with the case when bias is 0
30 |         ran = np.random.geometric(bias)
31 |         while ran > end - start:
32 |             ran = np.random.geometric(bias)
33 |         result = end - ran
34 |         return result
35 | 
36 |     def next_experience_batch(self):
37 |         # First get a start point randomly
38 |         batch = []
39 |         if self.__is_permed:
40 |             for i in range(self.__batch_size):
41 |                 batch.append(self.__experiences[self.__sample(self.__experiences[0].state_index,
42 |                                                               self.__experiences[-1].state_index,
43 |                                                               self.__sample_bias)])
44 |         else:
45 |             batch_start = self.__sample(0, len(self.__experiences) - self.__batch_size,
46 |                                         self.__sample_bias)
47 |             batch = self.__experiences[batch_start:batch_start+self.__batch_size]
48 |         return batch
49 | 
50 | 
51 | class Experience:
52 |     def __init__(self, state_index):
53 |         self.state_index = int(state_index)
54 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/up.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | class UP(TDAgent):
 5 |     """ Universal Portfolio by Thomas Cover enhanced for "leverage" (instead of just
 6 |         taking weights from a simplex, leverage allows us to stretch simplex to
 7 |         contain negative positions).
 8 | 
 9 |     Reference:
10 |         T. Cover. Universal Portfolios, 1991.
11 |         http://www-isl.stanford.edu/~cover/papers/paper93.pdf
12 |     """
13 |     def __init__(self, eval_points=10000, leverage=1., W=None):
14 |         """
15 |         :param eval_points: Number of evaluated points (approximately). Complexity of the
16 |             algorithm is O(time * eval_points * nr_assets**2) because of matrix multiplication.
17 |         :param leverage: Maximum leverage used. leverage == 1 corresponds to simplex,
18 |             leverage == 1/nr_stocks to uniform CRP. leverage > 1 allows negative weights
19 |             in portfolio.
20 |         """
21 |         super(UP, self).__init__()
22 |         self.eval_points = eval_points
23 |         self.leverage = leverage
24 |         self.W = W
25 | 
26 |     def init_portfolio(self, X):
27 |         """ Create a mesh on simplex and keep wealth of all strategies. """
28 |         m = X.shape[1]
29 |         # create set of CRPs
30 |         self.W = np.matrix(mc_simplex(m - 1, self.eval_points))
31 |         self.S = np.matrix(np.ones(self.W.shape[0])).T
32 | 
33 |         # stretch simplex based on leverage (simple calculation yields this)
34 |         leverage = max(self.leverage, 1./m)
35 |         stretch = (leverage - 1./m) / (1. - 1./m)
36 |         self.W = (self.W - 1./m) * stretch + 1./m
37 | 
38 | 
39 |     def decide_by_history(self, x, last_b):
40 |         # calculate new wealth of all CRPs
41 |         x = self.get_last_rpv(x)
42 |         x = np.reshape(x, (1,x.size))
43 | 
44 |         if self.W is None:
45 |             self.init_portfolio(x)
46 | 
47 |         self.S = np.multiply(self.S, self.W * np.matrix(x).T)
48 |         b = self.W.T * self.S
49 |         pv = b / np.sum(b)
50 |         pvn = np.ravel(pv)
51 |         return pvn #squeeze not working there
52 | 
53 | 
54 | 
55 | def mc_simplex(d, points):
56 |     '''Sample random points from a simplex with dimension d
57 |     :param d: Number of dimensions
58 |     :param points: Total number of points.
59 |     '''
60 |     a = np.sort(np.random.random((points,d)))
61 |     a = np.hstack([np.zeros((points,1)), a, np.ones((points,1))])
62 |     return np.diff(a)
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/pgportfolio/learn/rollingtrainer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from pgportfolio.learn.tradertrainer import TraderTrainer
 5 | import logging
 6 | import tflearn
 7 | 
 8 | 
 9 | class RollingTrainer(TraderTrainer):
10 |     def __init__(self, config, restore_dir=None, save_path=None, agent=None, device="cpu"):
11 |         config["training"]["buffer_biased"] = config["trading"]["buffer_biased"]
12 |         config["training"]["learning_rate"] = config["trading"]["learning_rate"]
13 |         TraderTrainer.__init__(self, config, restore_dir=restore_dir, save_path=save_path,
14 |                                agent=agent, device=device)
15 | 
16 |     @property
17 |     def agent(self):
18 |         return self._agent
19 | 
20 |     @property
21 |     def coin_list(self):
22 |         return self._matrix.coin_list
23 | 
24 |     @property
25 |     def data_matrices(self):
26 |         return self._matrix
27 | 
28 |     @property
29 |     def rolling_training_steps(self):
30 |         return self.config["trading"]["rolling_training_steps"]
31 | 
32 |     def __rolling_logging(self):
33 |         fast_train = self.train_config["fast_train"]
34 |         if not fast_train:
35 |             tflearn.is_training(False, self._agent.session)
36 | 
37 |             v_pv, v_log_mean = self._evaluate("validation",
38 |                                               self._agent.portfolio_value,
39 |                                               self._agent.log_mean)
40 |             t_pv, t_log_mean = self._evaluate("test", self._agent.portfolio_value, self._agent.log_mean)
41 |             loss_value = self._evaluate("training", self._agent.loss)
42 | 
43 |             logging.info('training loss is %s\n' % loss_value)
44 |             logging.info('the portfolio value on validation asset is %s\nlog_mean is %s\n' %
45 |                          (v_pv,v_log_mean))
46 |             logging.info('the portfolio value on test asset is %s\n mean is %s' % (t_pv,t_log_mean))
47 | 
48 |     def decide_by_history(self, history, last_w):
49 |         result = self._agent.decide_by_history(history, last_w)
50 |         return result
51 | 
52 |     def rolling_train(self, online_w=None):
53 |         steps = self.rolling_training_steps
54 |         if steps > 0:
55 |             self._matrix.append_experience(online_w)
56 |             for i in range(steps):
57 |                 x, y, last_w, w = self.next_batch()
58 |                 self._agent.train(x, y, last_w, w)
59 |             self.__rolling_logging()
60 | 


--------------------------------------------------------------------------------
/pgportfolio/marketdata/poloniex.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import time
 3 | import sys
 4 | from datetime import datetime
 5 | 
 6 | if sys.version_info[0] == 3:
 7 |     from urllib.request import Request, urlopen
 8 |     from urllib.parse import urlencode
 9 | else:
10 |     from urllib2 import Request, urlopen
11 |     from urllib import urlencode
12 | 
13 | minute = 60
14 | hour = minute*60
15 | day = hour*24
16 | week = day*7
17 | month = day*30
18 | year = day*365
19 | 
20 | # Possible Commands
21 | PUBLIC_COMMANDS = ['returnTicker', 'return24hVolume', 'returnOrderBook', 'returnTradeHistory', 'returnChartData', 'returnCurrencies', 'returnLoanOrders']
22 | 
23 | class Poloniex:
24 |     def __init__(self, APIKey='', Secret=''):
25 |         self.APIKey = APIKey.encode()
26 |         self.Secret = Secret.encode()
27 |         # Conversions
28 |         self.timestamp_str = lambda timestamp=time.time(), format="%Y-%m-%d %H:%M:%S": datetime.fromtimestamp(timestamp).strftime(format)
29 |         self.str_timestamp = lambda datestr=self.timestamp_str(), format="%Y-%m-%d %H:%M:%S": int(time.mktime(time.strptime(datestr, format)))
30 |         self.float_roundPercent = lambda floatN, decimalP=2: str(round(float(floatN) * 100, decimalP))+"%"
31 | 
32 |         # PUBLIC COMMANDS
33 |         self.marketTicker = lambda x=0: self.api('returnTicker')
34 |         self.marketVolume = lambda x=0: self.api('return24hVolume')
35 |         self.marketStatus = lambda x=0: self.api('returnCurrencies')
36 |         self.marketLoans = lambda coin: self.api('returnLoanOrders',{'currency':coin})
37 |         self.marketOrders = lambda pair='all', depth=10:\
38 |             self.api('returnOrderBook', {'currencyPair':pair, 'depth':depth})
39 |         self.marketChart = lambda pair, period=day, start=time.time()-(week*1), end=time.time(): self.api('returnChartData', {'currencyPair':pair, 'period':period, 'start':start, 'end':end})
40 |         self.marketTradeHist = lambda pair: self.api('returnTradeHistory',{'currencyPair':pair}) # NEEDS TO BE FIXED ON Poloniex
41 | 
42 |     #####################
43 |     # Main Api Function #
44 |     #####################
45 |     def api(self, command, args={}):
46 |         """
47 |         returns 'False' if invalid command or if no APIKey or Secret is specified (if command is "private")
48 |         returns {"error":"<error message>"} if API error
49 |         """
50 |         if command in PUBLIC_COMMANDS:
51 |             url = 'https://poloniex.com/public?'
52 |             args['command'] = command
53 |             ret = urlopen(Request(url + urlencode(args)))
54 |             return json.loads(ret.read().decode(encoding='UTF-8'))
55 |         else:
56 |             return False
57 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/cornu.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | import logging
 4 | from scipy.optimize import minimize
 5 | 
 6 | class CORNU(TDAgent):
 7 |     '''
 8 |     Correlation driven non parametric Uniform
 9 |     '''
10 |     def __init__(self, K=5, L=1, c=0.1, exp_w=None):
11 |         '''
12 |         :param K: maximum window size
13 |         :param L: splits into L parts, in each K
14 |         '''
15 |         super(CORNU, self).__init__()
16 |         self.K = K
17 |         self.L = L
18 |         self.c = c
19 |         self.exp_ret = np.ones((K,L))
20 |         self.exp_w = exp_w
21 | 
22 | 
23 |     def decide_by_history(self, X, last_b):
24 |         self.record_history(X)
25 | 
26 |         n, m = self.history.shape
27 | 
28 |         if self.exp_w is None:
29 |             self.exp_w = np.ones((self.K*self.L, m)) / m
30 | 
31 |         for k in np.arange(self.K):
32 |             for l in np.arange(self.L):
33 |                 self.exp_w[(k-1)*self.L+l,:] = self.update(self.history, k+1, self.c)
34 | 
35 | 
36 |         numerator = 0
37 |         denominator = 0
38 | 
39 |         p = 1./(self.K*self.L)
40 | 
41 |         for k in np.arange(self.K):
42 |             for l in np.arange(self.L):
43 |                 numerator += p * self.exp_ret[k,l] * self.exp_w[(k-1)*self.L+l,:]
44 |                 denominator += p * self.exp_ret[k,l]
45 | 
46 |         b = np.divide(numerator.T , denominator)
47 | 
48 |         self.exp_ret[:,0] *= np.dot(self.history[-1,:], self.exp_w.T)
49 | 
50 |         return b
51 | 
52 |     def update(self, data, w, c):
53 |         '''
54 |         :param w: window sze
55 |         :param c: correlation coefficient threshold
56 |         '''
57 |         T, N = data.shape
58 |         m = -1
59 |         histdata = np.zeros((T,N))
60 | 
61 |         if T <= w+1:
62 |             return np.ones(N) / N
63 | 
64 |         if w==0:
65 |             histdata = data[:T,:]
66 |             m = T
67 |         else:
68 |             for i in np.arange(w, T):
69 |                 d1 = data[i-w:i,:].ravel()
70 |                 d2 = data[T-w:T,:].ravel()
71 | 
72 |                 datacorr = np.corrcoef(d1,d2)[1,0]
73 | 
74 | 
75 |                 if datacorr >= c:
76 |                     m += 1
77 |                     histdata[m,:] = data[i,:] #minus one to avoid out of bounds issue
78 | 
79 |         if m==-1:
80 |             return np.ones(N) / N
81 | 
82 |         b = opt(histdata[:m+1,:])
83 |         return b
84 | 
85 | def opt(X):
86 |     x_0 = np.ones(X.shape[1]) / X.shape[1]
87 |     objective = lambda b: -np.prod(X.dot(b))
88 |     cons = ({'type': 'eq', 'fun': lambda b: 1-np.sum(b)},)
89 |     bnds = [(0,1)]*len(x_0)
90 |     res = minimize(objective, x0=x_0,  bounds=bnds, constraints=cons, method='slsqp', )
91 |     return res.x
92 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/pamr.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | 
 5 | class PAMR(TDAgent):
 6 |     """ Passive aggressive mean reversion strategy for portfolio selection.
 7 |     There are three variants with different parameters, see original article
 8 |     for details.
 9 | 
10 |     Reference:
11 |         B. Li, P. Zhao, S. C.H. Hoi, and V. Gopalkrishnan.
12 |         Pamr: Passive aggressive mean reversion strategy for portfolio selection, 2012.
13 |         http://www.cais.ntu.edu.sg/~chhoi/paper_pdf/PAMR_ML_final.pdf
14 |     """
15 |     def __init__(self, eps=0.5, C=500, variant=2, b=None):
16 |         """
17 |         :param eps: Control parameter for variant 0. Must be >=0, recommended value is
18 |                     between 0.5 and 1.
19 |         :param C: Control parameter for variant 1 and 2. Recommended value is 500.
20 |         :param variant: Variants 0, 1, 2 are available.
21 |         """
22 |         super(PAMR, self).__init__()
23 | 
24 |         # input check
25 |         if not(eps >= 0):
26 |             raise ValueError('epsilon parameter must be >=0')
27 | 
28 |         if variant == 0:
29 |             if eps is None:
30 |                 raise ValueError('eps parameter is required for variant 0')
31 |         elif variant == 1 or variant == 2:
32 |             if C is None:
33 |                 raise ValueError('C parameter is required for variant 1,2')
34 |         else:
35 |             raise ValueError('variant is a number from 0,1,2')
36 | 
37 |         self.eps = eps
38 |         self.C = C
39 |         self.variant = variant
40 |         self.b = b
41 | 
42 |     def decide_by_history(self, x, last_b):
43 |         x = self.get_last_rpv(x)
44 |         # calculate return prediction
45 |         if self.b is None:
46 |             self.b = np.ones(x.size) / x.size
47 |         last_b = self.b
48 |         b = self.update(last_b, x, self.eps, self.C)
49 |         b = b.ravel()
50 |         self.b = b
51 |         return self.b
52 | 
53 | 
54 |     def update(self, b, x, eps, C):
55 |         """ Update portfolio weights to satisfy constraint b * x <= eps
56 |         and minimize distance to previous weights. """
57 |         x_mean = np.mean(x)
58 | 
59 |         le = np.maximum(0., np.dot(b, x) - eps)
60 | 
61 |         denominator = np.square(np.linalg.norm(x-x_mean))
62 | 
63 |         if self.variant == 0:
64 |             tau = le / denominator
65 |         elif self.variant == 1:
66 |             tau = np.minimum(C, le / denominator)
67 |         elif self.variant == 2:
68 |             tau = le / (denominator + 0.5 / C)
69 | 
70 |         # limit lambda to avoid numerical problems
71 |         tau = np.minimum(100000, tau)
72 | 
73 |         # update portfolio
74 |         b = b - tau * (x - x_mean)
75 | 
76 |         # project it onto simplex
77 |         return self.simplex_proj(b)
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/bk_deprecated.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from ..tdagent import TDAgent
 3 | import numpy as np
 4 | import pandas as pd
 5 | from scipy.optimize import minimize
 6 | import logging
 7 | 
 8 | 
 9 | class BK(TDAgent):
10 |     """ Kernel based strategy. It tries to find similar sequences of price in history and then maximize objective function (that is profit) on the days following them.
11 | 
12 |     Reference:
13 |         L. Gyorfi, G. Lugosi, and F. Udina. Nonparametric kernel based sequential
14 |         investment strategies. Mathematical Finance 16 (2006) 337–357.
15 |     """
16 |     def __init__(self, k=5, l=10):
17 |         """
18 |         :param k: Sequence length.
19 |         :param l: Number of nearest neighbors.
20 |         """
21 | 
22 |         super(BK, self).__init__()
23 |         self.k = k
24 |         self.l = l
25 | 
26 |     def decide_by_history(self, x, last_b):
27 |         self.record_history(x)
28 |         history = pd.DataFrame(self.history)
29 |         # find indices of nearest neighbors throughout history
30 |         ixs = self.find_nn(history, self.k, self.l)
31 | 
32 |         # get returns from the days following NNs
33 |         J = history.iloc[[history.index.get_loc(i) + 1 for i in ixs]]
34 | 
35 |         # get best weights
36 |         return opt_weights(J)
37 | 
38 | 
39 |     def find_nn(self, H, k, l):
40 |         """ Note that nearest neighbors are calculated in a different (more efficient) way than shown
41 |         in the article.
42 | 
43 |         param H: history
44 |         """
45 |         # calculate distance from current sequence to every other point
46 |         D = H * 0
47 |         for i in range(1, k+1):
48 |             D += (H.shift(i-1) - H.iloc[-i])**2
49 |         D = D.sum(1).iloc[:-1]
50 | 
51 |         # sort and find nearest neighbors
52 |         D.sort_values(inplace=True)
53 |         return D.index[:l]
54 | 
55 | 
56 | def opt_weights(X, max_leverage=1):
57 |     x_0 = max_leverage * np.ones(X.shape[1]) / float(X.shape[1])
58 |     objective = lambda b: -np.sum(np.log(np.maximum(np.dot(X-1, b)+1,1e-4)))
59 |     cons = ({'type': 'eq', 'fun': lambda b: max_leverage-sum(b)},)
60 |     bnds = [(0., max_leverage)]*len(x_0)
61 |     while True:
62 |         res = minimize(objective, x_0, bounds=bnds, constraints=cons, method='slsqp')
63 |         eps = 1e-7
64 |         if (res.x < 0-eps).any() or (res.x > max_leverage+eps).any():
65 |             X = X + np.random.randn(1)[0] * 1e-5
66 |             logging.debug('Optimal weights not found, trying again')
67 |             continue
68 |         elif res.success:
69 |             break
70 |         else:
71 |             if np.isnan(res.x).any():
72 |                 logging.warning('Solution not found')
73 |                 res.x = np.ones(X.shape[1]) / X.shape[1]
74 |             else:
75 |                 logging.warning("converged but not successfully")
76 |             break
77 | 
78 |     return res.x
79 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/olmar.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | 
 4 | class OLMAR(TDAgent):
 5 |     """ On-Line Portfolio Selection with Moving Average Reversion
 6 | 
 7 |     Reference:
 8 |         B. Li and S. C. H. Hoi.
 9 |         On-line portfolio selection with moving average reversion, 2012.
10 |         http://icml.cc/2012/papers/168.pdf
11 |     """
12 | 
13 |     def __init__(self, window=5, eps=10, cum_ret=1, count=0, b=None):
14 |         """
15 |         :param window: Lookback window.
16 |         :param eps(epsilon): Constraint on return for new weights on last price (average of prices).
17 |             x * w >= eps for new weights w.
18 |         """
19 |         super(OLMAR, self).__init__()
20 |         # input check
21 |         if window < 2:
22 |             raise ValueError('window parameter must be >=3')
23 |         if eps < 1:
24 |             raise ValueError('epsilon parameter must be >=1')
25 | 
26 |         self.window = window
27 |         self.eps = eps
28 |         self.b = b
29 | 
30 |         #debugging parameters
31 |         #self.cum_ret=cum_ret
32 |         #self.count = count
33 |         #self.last_b = last_b
34 | 
35 |     def decide_by_history(self, x, last_b):
36 |         self.record_history(x)
37 |         nx = self.get_last_rpv(x)
38 |         #if self.last_b is None:
39 |         #    self.last_b = np.ones(12)/12
40 |         #if self.history.shape[0] < self.window:
41 |         #    return np.ones(nx.size) /nx.size
42 |         #predict next price relative vector
43 |         if self.b is None:
44 |             self.b = np.ones(nx.size) / nx.size
45 |         last_b = self.b
46 |         if self.history.shape[0] < self.window + 1:
47 |             data_phi=self.history[self.history.shape[0]-1,:]
48 |         else:
49 |             data_phi = np.zeros((1,nx.size))
50 |             tmp_x = np.ones((1,nx.size))
51 |             temp = 1.
52 |             for i in range(self.window):
53 |                 data_phi += temp
54 |                 tmp_x = np.multiply(tmp_x, self.history[-i-1,:])
55 |                 temp = 1. / tmp_x
56 |             data_phi = data_phi * (1./self.window)
57 |         data_phi = np.squeeze(data_phi)
58 |         #update portfolio
59 |         b = self.update(last_b, data_phi, self.eps)
60 |         #self.last_b = b
61 |         #self.cum_ret *= np.dot(last_b, nx)
62 |         #self.count += 1
63 |         #print 'period %d, total return is %f' % (self.count, self.cum_ret)
64 |         b = b.ravel()
65 |         self.b = b
66 |         return self.b
67 | 
68 | 
69 |     def update(self, b, x, eps):
70 |         """ Update portfolio weights to satisfy constraint b * x >= eps
71 |         and minimize distance to previous weights. """
72 |         x_mean = x.mean()
73 |         ell = max(0, eps - b.dot(x))
74 |         denominator = np.linalg.norm(x-x_mean)**2
75 |         if denominator == 0:
76 |             #zero valatility
77 |             lam = 0
78 |         else:
79 |             lam = ell / denominator
80 |         # update portfolio
81 |         b = b + lam * (x - x_mean)
82 |         # project it onto simplex
83 |         return self.euclidean_proj_simplex(b)
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/cornk.py:
--------------------------------------------------------------------------------
  1 | from ..tdagent import TDAgent
  2 | import numpy as np
  3 | import logging
  4 | from scipy.optimize import minimize
  5 | 
  6 | class CORNK(TDAgent):
  7 |     '''
  8 |     Correlation driven non parametric Uniform
  9 |     '''
 10 |     def __init__(self, K=5, L=10, pc=0.1, exp_w=None):
 11 |         '''
 12 |         :param K: maximum window size
 13 |         :param L: splits into L parts, in each K
 14 |         '''
 15 |         super(CORNK, self).__init__()
 16 |         self.K = K
 17 |         self.L = L
 18 |         self.pc = pc
 19 |         self.exp_ret = np.ones((K,L))
 20 |         self.exp_w = exp_w
 21 | 
 22 | 
 23 |     def decide_by_history(self, X, last_b):
 24 |         self.record_history(X)
 25 | 
 26 |         n, m = self.history.shape
 27 | 
 28 |         if self.exp_w is None:
 29 |             self.exp_w = np.ones((self.K*self.L, m)) / m
 30 | 
 31 |         for k in np.arange(self.K):
 32 |             for l in np.arange(self.L):
 33 |                 rho = l / self.L
 34 |                 self.exp_w[(k-1)*self.L+l,:] = self.update(self.history, k+1, rho)
 35 | 
 36 |         nc = np.ceil(self.pc*self.K*self.L)
 37 |         exp_ret_vec = self.exp_ret.ravel()
 38 |         exp_ret_sort = np.sort(exp_ret_vec, kind='heapsort')
 39 |         ret_rho = exp_ret_sort[int(self.K*self.L-nc+1)]
 40 | 
 41 |         numerator = 0
 42 |         denominator = 0
 43 | 
 44 |         p = 1./(self.K*self.L)
 45 | 
 46 |         for k in np.arange(self.K):
 47 |             for l in np.arange(self.L):
 48 |                 p = 1 if self.exp_ret[k,l] >= ret_rho else 0
 49 |                 numerator += p * self.exp_ret[k,l] * self.exp_w[(k-1)*self.L+l,:]
 50 |                 denominator += p * self.exp_ret[k,l]
 51 | 
 52 |         b = np.divide(numerator.T , denominator)
 53 | 
 54 |         for k in range(self.K):
 55 |             for l in range(self.L):
 56 |                 self.exp_ret[k,l] *= np.dot(self.history[-1,:], self.exp_w[(k-1)*self.L+l,:].T)
 57 | 
 58 |         return b
 59 | 
 60 |     def update(self, data, w, c):
 61 |         '''
 62 |         :param w: window sze
 63 |         :param c: correlation coefficient threshold
 64 |         '''
 65 |         T, N = data.shape
 66 |         m = -1
 67 |         histdata = np.zeros((T,N))
 68 | 
 69 |         if T <= w+1:
 70 |             return np.ones(N) / N
 71 | 
 72 |         if w==0:
 73 |             histdata = data[:T,:]
 74 |             m = T
 75 |         else:
 76 |             for i in np.arange(w, T):
 77 |                 d1 = data[i-w:i,:].ravel()
 78 |                 d2 = data[T-w:T,:].ravel()
 79 | 
 80 |                 datacorr = np.corrcoef(d1,d2)[1,0]
 81 | 
 82 | 
 83 |                 if datacorr >= c:
 84 |                     m += 1
 85 |                     histdata[m,:] = data[i,:] #minus one to avoid out of bounds issue
 86 | 
 87 |         if m==-1:
 88 |             return np.ones(N) / N
 89 | 
 90 |         b = opt(histdata[:m+1,:])
 91 |         return b
 92 | 
 93 | def opt(X):
 94 |     x_0 = np.ones(X.shape[1]) / X.shape[1]
 95 |     objective = lambda b: -np.prod(X.dot(b))
 96 |     cons = ({'type': 'eq', 'fun': lambda b: 1-np.sum(b)},)
 97 |     bnds = [(0,1)]*len(x_0)
 98 |     res = minimize(objective, x0=x_0,  bounds=bnds, constraints=cons, method='slsqp', )
 99 |     return res.x
100 | 


--------------------------------------------------------------------------------
/pgportfolio/trade/backtest.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import numpy as np
 3 | from pgportfolio.trade import trader
 4 | from pgportfolio.marketdata.datamatrices import DataMatrices
 5 | import logging
 6 | from pgportfolio.tools.trade import calculate_pv_after_commission
 7 | 
 8 | 
 9 | class BackTest(trader.Trader):
10 |     def __init__(self, config, net_dir=None, agent=None, agent_type="nn"):
11 |         trader.Trader.__init__(self, 0, config, 0, net_dir,
12 |                                initial_BTC=1, agent=agent, agent_type=agent_type)
13 |         if agent_type == "nn":
14 |             data_matrices = self._rolling_trainer.data_matrices
15 |         elif agent_type == "traditional":
16 |             config["input"]["feature_number"] = 1
17 |             data_matrices = DataMatrices.create_from_config(config)
18 |         else:
19 |             raise ValueError()
20 |         self.__test_set = data_matrices.get_test_set()
21 |         self.__test_length = self.__test_set["X"].shape[0]
22 |         self._total_steps = self.__test_length
23 |         self.__test_pv = 1.0
24 |         self.__test_pc_vector = []
25 | 
26 |     @property
27 |     def test_pv(self):
28 |         return self.__test_pv
29 | 
30 |     @property
31 |     def test_pc_vector(self):
32 |         return np.array(self.__test_pc_vector, dtype=np.float32)
33 | 
34 |     def finish_trading(self):
35 |         self.__test_pv = self._total_capital
36 | 
37 |         """
38 |         fig, ax = plt.subplots()
39 |         ax.bar(np.arange(len(self._rolling_trainer.data_matrices.sample_count)),
40 |                self._rolling_trainer.data_matrices.sample_count)
41 |         fig.tight_layout()
42 |         plt.show()
43 |         """
44 | 
45 |     def _log_trading_info(self, time, omega):
46 |         pass
47 | 
48 |     def _initialize_data_base(self):
49 |         pass
50 | 
51 |     def _write_into_database(self):
52 |         pass
53 | 
54 |     def __get_matrix_X(self):
55 |         return self.__test_set["X"][self._steps]
56 | 
57 |     def __get_matrix_y(self):
58 |         return self.__test_set["y"][self._steps, 0, :]
59 | 
60 |     def rolling_train(self, online_sample=None):
61 |         self._rolling_trainer.rolling_train()
62 | 
63 |     def generate_history_matrix(self):
64 |         inputs = self.__get_matrix_X()
65 |         if self._agent_type == "traditional":
66 |             inputs = np.concatenate([np.ones([1, 1, inputs.shape[2]]), inputs], axis=1)
67 |             inputs = inputs[:, :, 1:] / inputs[:, :, :-1]
68 |         return inputs
69 | 
70 |     def trade_by_strategy(self, omega):
71 |         logging.info("the step is {}".format(self._steps))
72 |         logging.debug("the raw omega is {}".format(omega))
73 |         future_price = np.concatenate((np.ones(1), self.__get_matrix_y()))
74 |         pv_after_commission = calculate_pv_after_commission(omega, self._last_omega, self._commission_rate)
75 |         portfolio_change = pv_after_commission * np.dot(omega, future_price)
76 |         self._total_capital *= portfolio_change
77 |         self._last_omega = pv_after_commission * omega * \
78 |                            future_price /\
79 |                            portfolio_change
80 |         logging.debug("the portfolio change this period is : {}".format(portfolio_change))
81 |         self.__test_pc_vector.append(portfolio_change)
82 | 
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This is the original implementation of our paper, A Deep Reinforcement Learning Framework for the Financial Portfolio Management Problem ([arXiv:1706.10059](https://arxiv.org/abs/1706.10059)), together with a toolkit of portfolio management research.
 2 | 
 3 | * The deep reinforcement learning framework is the core part of the library.
 4 | The method is basically the policy gradient on immediate reward.
 5 |  One can configurate the topology, training method or input data in a separate json file. The training process will be recorded and user can visualize the training using tensorboard.
 6 | Result summary and parallel training are allowed for better hyper-parameters optimization.
 7 | * The financial-model-based portfolio management algorithms are also embedded in this library for comparision purpose, whose implementation is based on Li and Hoi's toolkit [OLPS](https://github.com/OLPS/OLPS).
 8 | 
 9 | ## Differences from the article version
10 | Note that this library is a part of our main project, and it is several versions ahead of the article.
11 | 
12 | * In this version, some technical bugs are fixed and improvements in hyper-parameter tuning and engineering are made.
13 |   * The most important bug in the arxiv v2 article is that the test time-span mentioned is about 30% shorter than the actual experiment. Thus the volumn-observation interval (for asset selection) overlapped with the backtest data in the paper.
14 | * With new hyper-parameters, users can train the models with smaller time durations.(less than 30 mins)
15 | * All updates will be incorporated into future versions of the paper.
16 | * Original versioning history,  and internal discussions, including some in-code comments, are removed in this open-sourced edition. These contains our unimplemented ideas, some of which will very likely become the foundations of our future publications
17 | 
18 | ## Platform Support
19 | Python 3.5+ in windows and Python 2.7+/3.5+ in linux are supported.
20 | 
21 | ## Dependencies
22 | Install Dependencies via `pip install -r requirements.txt`
23 | 
24 | * tensorflow (>= 1.0.0)
25 | * tflearn
26 | * pandas
27 | * ...
28 | 
29 | ## User Guide
30 | Please check out [User Guide](user_guide.md)
31 | 
32 | ## Acknowledgement
33 | This project would not have been finished without using the codes from the following open source projects:
34 | * [Online Portfolio Selection toolbox](https://github.com/OLPS/OLPS)
35 | 
36 | ## Community Contribution
37 | We welcome contributions from the community, including but not limited to:
38 | * Bug fixing
39 | * Interfacing to other markets such as stock, futures, options
40 | * Adding broker API (under `marketdata`)
41 | * More backtest strategies (under `tdagent`)
42 | 
43 | ## Risk Disclaimer (for Live-trading)
44 | 
45 | There is always risk of loss in trading. **All trading strategies are used at your own risk**
46 | 
47 | *The volumes of many cryptocurrency markets are still low. Market impact and slippage may badly affect the results during live trading.*
48 | 
49 | ## Donation
50 | If you have made some profits because of this project or you just love reading our codes, please consider making a small donation to our ongoing projects via the following BTC or ETH address. All donations will be used as student stipends. 
51 | 
52 | * BTC: [1PEHK1nVi8x4HQM1A67anyfYzdDFoqUo21](https://blockchain.info/address/1PEHK1nVi8x4HQM1A67anyfYzdDFoqUo21)
53 | * ETH: [0xa8197289e16C0cCad0a4838719ce11C9A920cfB7](https://etherscan.io/address/0xa8197289e16C0cCad0a4838719ce11C9A920cfB7)
54 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/corn_deprecated.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | from scipy.stats.stats import pearsonr
 4 | from scipy.optimize import minimize
 5 | import logging
 6 | 
 7 | class CORN(TDAgent):
 8 |     """
 9 |     Correlation-driven nonparametric learning approach. Similar to anticor but instead
10 |     of distance of return vectors they use correlation.
11 |     In appendix of the article, universal property is proven.
12 | 
13 |     Two versions are available. Fast which provides around 2x speedup, but uses more memory
14 |     (linear in window) and slow version which is memory efficient. Most efficient would
15 |     be to rewrite it in sweave or numba.
16 | 
17 |     Reference:
18 |         B. Li, S. C. H. Hoi, and V. Gopalkrishnan.
19 |         Corn: correlation-driven nonparametric learning approach for portfolio selection, 2011.
20 |         http://www.cais.ntu.edu.sg/~chhoi/paper_pdf/TIST-CORN.pdf
21 |     """
22 |     def __init__(self, w=5, rho=0.1):
23 |         """
24 |         :param w: Window parameter.
25 |         :param rho: Correlation coefficient threshold. Recommended is 0.
26 |         """
27 |         # input check
28 |         if not(-1 <= rho <= 1):
29 |             raise ValueError('rho must be between -1 and 1')
30 |         if not(w >= 2):
31 |             raise ValueError('window must be greater than 2')
32 |         super(CORN, self).__init__()
33 |         self.w = w
34 |         self.rho = rho
35 | 
36 | 
37 |     def decide_by_history(self, x, last_b):
38 |         self.record_history(x)
39 |         x = self.get_last_rpv(x)
40 | 
41 |         T, N = self.history.shape
42 |         m = 0
43 |         histdata = np.zeros((T,N))
44 | 
45 |         if T <= self.w+1:
46 |             '''use uniform portfolio weight vector'''
47 |             return np.ones(x.size) / x.size
48 | 
49 |         if self.w==0:
50 |             histdata = self.history
51 |             m = T
52 |         else:
53 |             for i in np.arange(self.w+1, T+1):
54 |                 d1 = self.history[i-self.w:i-1,:]
55 |                 d2 = self.history[T-self.w+1:T,:]
56 | 
57 |                 datacorr = np.corrcoef(d1,d2)[0,1]
58 | 
59 |                 if datacorr >= self.rho:
60 |                     m += 1
61 |                     histdata[m,:] = self.history[i-1,:] #minus one to avoid out of bounds issue
62 | 
63 |         if m==0:
64 |             return np.ones(x.size) / x.size
65 | 
66 |         #sqp according to OLPS implementation
67 |         x_0 = np.ones(x.size) / x.size
68 |         objective = lambda b: -np.prod(np.dot(histdata, b))
69 |         cons = ({'type': 'eq', 'fun': lambda b: 1-np.sum(b, axis=0)},)
70 |         bnds = [(0.,1)]*x.size
71 |         while True:
72 |             res = minimize(objective, x_0, bounds=bnds, constraints=cons, method='slsqp')
73 |             eps = 1e-7
74 |             if (res.x < 0-eps).any() or (res.x > 1+eps).any():
75 |                 x = x + np.random.randn(1)[0] * 1e-5
76 |                 logging.debug('Optimal portfolio weight vector not found, trying again...')
77 |                 continue
78 |             elif res.success:
79 |                 break
80 |             else:
81 |                 if np.isnan(res.x).any():
82 |                     logging.warning('Solution does not exist, use uniform pwv')
83 |                     res.x = np.ones(x.size) / x.size
84 |                 else:
85 |                     logging.warning('Converged but not successfully.')
86 |                 break
87 | 
88 |         return res.x
89 | 


--------------------------------------------------------------------------------
/pgportfolio/autotrain/training.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import logging
 6 | import os
 7 | import time
 8 | from multiprocessing import Process
 9 | from pgportfolio.learn.tradertrainer import TraderTrainer
10 | from pgportfolio.tools.configprocess import load_config
11 | 
12 | 
13 | def train_one(save_path, config, log_file_dir, index, logfile_level, console_level, device):
14 |     """
15 |     train an agent
16 |     :param save_path: the path to save the tensorflow model (.ckpt), could be None
17 |     :param config: the json configuration file
18 |     :param log_file_dir: the directory to save the tensorboard logging file, could be None
19 |     :param index: identifier of this train, which is also the sub directory in the train_package,
20 |     if it is 0. nothing would be saved into the summary file.
21 |     :param logfile_level: logging level of the file
22 |     :param console_level: logging level of the console
23 |     :param device: 0 or 1 to show which gpu to use, if 0, means use cpu instead of gpu
24 |     :return : the Result namedtuple
25 |     """
26 |     if log_file_dir:
27 |         logging.basicConfig(filename=log_file_dir.replace("tensorboard","programlog"),
28 |                             level=logfile_level)
29 |         console = logging.StreamHandler()
30 |         console.setLevel(console_level)
31 |         logging.getLogger().addHandler(console)
32 |     print("training at %s started" % index)
33 |     return TraderTrainer(config, save_path=save_path, device=device).train_net(log_file_dir=log_file_dir, index=index)
34 | 
35 | def train_all(processes=1, device="cpu"):
36 |     """
37 |     train all the agents in the train_package folders
38 | 
39 |     :param processes: the number of the processes. If equal to 1, the logging level is debug
40 |                       at file and info at console. If greater than 1, the logging level is
41 |                       info at file and warming at console.
42 |     """
43 |     if processes == 1:
44 |         console_level = logging.INFO
45 |         logfile_level = logging.DEBUG
46 |     else:
47 |         console_level = logging.WARNING
48 |         logfile_level = logging.INFO
49 |     train_dir = "train_package"
50 |     if not os.path.exists("./" + train_dir): #if the directory does not exist, creates one
51 |         os.makedirs("./" + train_dir)
52 |     all_subdir = os.listdir("./" + train_dir)
53 |     all_subdir.sort()
54 |     pool = []
55 |     for dir in all_subdir:
56 |         # train only if the log dir does not exist
57 |         if not str.isdigit(dir):
58 |             return
59 |         # NOTE: logfile is for compatibility reason
60 |         if not (os.path.isdir("./"+train_dir+"/"+dir+"/tensorboard") or os.path.isdir("./"+train_dir+"/"+dir+"/logfile")):
61 |             p = Process(target=train_one, args=(
62 |                 "./" + train_dir + "/" + dir + "/netfile",
63 |                 load_config(dir),
64 |                 "./" + train_dir + "/" + dir + "/tensorboard",
65 |                 dir, logfile_level, console_level, device))
66 |             p.start()
67 |             pool.append(p)
68 |         else:
69 |             continue
70 | 
71 |         # suspend if the processes are too many
72 |         wait = True
73 |         while wait:
74 |             time.sleep(5)
75 |             for p in pool:
76 |                 alive = p.is_alive()
77 |                 if not alive:
78 |                     pool.remove(p)
79 |             if len(pool)<processes:
80 |                 wait = False
81 |     print("All the Tasks are Over")
82 | 


--------------------------------------------------------------------------------
/pgportfolio/tools/trade.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division,absolute_import,print_function
 2 | import numpy as np
 3 | from pgportfolio.marketdata.datamatrices import DataMatrices
 4 | from pgportfolio.marketdata.globaldatamatrix import HistoryManager
 5 | from pgportfolio.tools.configprocess import parse_time
 6 | from pgportfolio.constants import *
 7 | from pgportfolio.tools.data import get_volume_forward
 8 | from time import time
 9 | 
10 | 
11 | def get_coin_name_list(config, online):
12 |     """
13 |     :param online: boolean value to show if connected to internet,
14 |     if False, load data from database.
15 |     :return : list of coin names
16 |     """
17 |     input_config = config["input"]
18 |     if not online:
19 |         start = parse_time(input_config["start_date"])
20 |         end = parse_time(input_config["end_date"])
21 |         volume_forward = get_volume_forward(end - start,
22 |                                             input_config["test_portion"]
23 |                                             + input_config["validation_portion"],
24 |                                             input_config["portion_reversed"])
25 |     else:
26 |         end = time()
27 |         volume_forward = 0
28 |     end = end - (end % input_config["trade_period"])
29 |     start = end - volume_forward - input_config["volume_average_days"] * DAY
30 |     end = end - volume_forward
31 |     coins = HistoryManager(input_config["coin_number"], end,
32 |                            volume_forward=volume_forward,
33 |                            volume_average_days=input_config["volume_average_days"],
34 |                            online=online).\
35 |         select_coins(start, end)
36 |     return coins
37 | 
38 | 
39 | def calculate_pv_after_commission(w1, w0, commission_rate):
40 |     """
41 |     @:param w1: target portfolio vector, first element is btc
42 |     @:param w0: rebalanced last period portfolio vector, first element is btc
43 |     @:param commission_rate: rate of commission fee, proportional to the transaction cost
44 |     """
45 |     mu0 = 1
46 |     mu1 = 1 - 2*commission_rate + commission_rate ** 2
47 |     while abs(mu1-mu0) > 1e-10:
48 |         mu0 = mu1
49 |         mu1 = (1 - commission_rate * w0[0] -
50 |             (2 * commission_rate - commission_rate ** 2) *
51 |             np.sum(np.maximum(w0[1:] - mu1*w1[1:], 0))) / \
52 |             (1 - commission_rate * w1[0])
53 |     return mu1
54 | 
55 | 
56 | def get_test_data(config):
57 |     """
58 |     :return : a 2d numpy array with shape(coin_number, periods),
59 |      each element the relative price
60 |     """
61 |     config["input"]["feature_number"] = 1
62 |     config["input"]["norm_method"] = "relative"
63 |     config["input"]["global_period"] = config["input"]["global_period"]
64 |     price_matrix = DataMatrices.create_from_config(config)
65 |     test_set = price_matrix.get_test_set()["y"][:, 0, :].T
66 |     test_set = np.concatenate((np.ones((1, test_set.shape[1])), test_set), axis=0)
67 |     return test_set
68 | 
69 | 
70 | def asset_vector_to_dict(coin_list, vector, with_BTC=True):
71 |     vector = np.squeeze(vector)
72 |     dict_coin = {}
73 |     if with_BTC:
74 |         dict_coin['BTC'] = vector[0]
75 |     for i, name in enumerate(coin_list):
76 |         if vector[i+1] > 0:
77 |             dict_coin[name] = vector[i + 1]
78 |     return dict_coin
79 | 
80 | 
81 | def save_test_data(config, file_name="test_data", output_format="csv"):
82 |     if output_format == "csv":
83 |         matrix = get_test_data(config)
84 |         with open(file_name+"."+output_format, 'wb') as f:
85 |             np.savetxt(f, matrix.T, delimiter=",")
86 | 
87 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/cwmr_std.py:
--------------------------------------------------------------------------------
 1 | from ..tdagent import TDAgent
 2 | import numpy as np
 3 | import scipy.stats
 4 | from numpy.linalg import inv
 5 | from numpy import diag, sqrt, log, trace
 6 | 
 7 | 
 8 | class CWMR_STD(TDAgent):
 9 |     """ Confidence weighted mean reversion.
10 | 
11 |     Reference:
12 |         B. Li, S. C. H. Hoi, P.L. Zhao, and V. Gopalkrishnan.
13 |         Confidence weighted mean reversion strategy for online portfolio selection, 2013.
14 |         http://jmlr.org/proceedings/papers/v15/li11b/li11b.pdf
15 |     """
16 |     def __init__(self, eps=-0.5, confidence=0.95, sigma=None):
17 |         """
18 |         :param eps: Mean reversion threshold (expected return on current day must be lower
19 |                     than this threshold). Recommended value is -0.5.
20 |         :param confidence: Confidence parameter for profitable mean reversion portfolio. Recommended value is 0.95.
21 |         """
22 |         super(CWMR_STD, self).__init__()
23 |         # input check
24 |         if not (0 <= confidence <= 1):
25 |             raise ValueError('confidence must be from interval [0,1]')
26 | 
27 |         self.eps = eps
28 |         self.theta = scipy.stats.norm.ppf(confidence)
29 |         self.sigma = sigma
30 | 
31 |     def init_portfolio(self, X):
32 |         m = X.shape[1]
33 |         self.sigma = np.matrix(np.eye(m) / m**2)
34 | 
35 | 
36 |     def decide_by_history(self, x, last_b):
37 |         x = self.get_last_rpv(x)
38 |         x = np.reshape(x, (1,x.size))
39 |         last_b = np.reshape(last_b, (1,last_b.size))
40 |         if self.sigma is None:
41 |             self.init_portfolio(x)
42 |         # initialize
43 |         m = len(x)
44 |         mu = np.matrix(last_b).T
45 |         sigma = self.sigma
46 |         theta = self.theta
47 |         eps = self.eps
48 |         x = np.matrix(x).T    # matrices are easier to manipulate
49 | 
50 |         # 4. Calculate the following variables
51 |         M = (mu.T * x).mean()
52 |         V = x.T * sigma * x
53 |         x_upper = sum(diag(sigma) * x) / trace(sigma)
54 | 
55 |         # 5. Update the portfolio distribution
56 |         mu, sigma = self.update(x, x_upper, mu, sigma, M, V, theta, eps)
57 | 
58 |         # 6. Normalize mu and sigma
59 |         mu = self.simplex_proj(mu)
60 |         sigma = sigma / (m**2 * trace(sigma))
61 |         """
62 |         sigma(sigma < 1e-4*eye(m)) = 1e-4;
63 |         """
64 |         self.sigma = sigma
65 | 
66 |         return np.ravel(mu)
67 | 
68 |     def update(self, x, x_upper, mu, sigma, M, V, theta, eps):
69 |         # lambda from equation 7
70 |         foo = (V - x_upper * x.T * np.sum(sigma, axis=1)) / M**2 + V * theta**2 / 2.
71 |         a = foo**2 - V**2 * theta**4 / 4
72 |         b = 2 * (eps - np.log(M)) * foo
73 |         c = (eps - np.log(M))**2 - V * theta**2
74 | 
75 |         a,b,c = a[0,0], b[0,0], c[0,0]
76 | 
77 |         lam = np.amax([0,
78 |                   (-b + sqrt(b**2 - 4 * a * c)) / (2. * a),
79 |                   (-b - sqrt(b**2 - 4 * a * c)) / (2. * a)])
80 |         # bound it due to numerical problems
81 |         lam = np.minimum(lam, 1E+7)
82 | 
83 |         # update mu and sigma
84 |         U_sqroot = 0.5 * (-lam * theta * V + sqrt(lam**2 * theta**2 * V**2 + 4*V))
85 |         mu = mu - lam * sigma * (x - x_upper) / M
86 |         sigma = inv(inv(sigma) + theta * lam / U_sqroot * diag(x)**2)
87 |         """
88 |         tmp_sigma = inv(inv(sigma) + theta*lam/U_sqroot*diag(xt)^2);
89 |         % Don't update sigma if results are badly scaled.
90 |         if all(~isnan(tmp_sigma(:)) & ~isinf(tmp_sigma(:)))
91 |             sigma = tmp_sigma;
92 |         end
93 |         """
94 |         return mu, sigma
95 | 
96 | 


--------------------------------------------------------------------------------
/pgportfolio/marketdata/coinlist.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from pgportfolio.marketdata.poloniex import Poloniex
 5 | from pgportfolio.tools.data import get_chart_until_success
 6 | import pandas as pd
 7 | from datetime import datetime
 8 | import logging
 9 | from pgportfolio.constants import *
10 | 
11 | 
12 | class CoinList(object):
13 |     def __init__(self, end, volume_average_days=1, volume_forward=0):
14 |         self._polo = Poloniex()
15 |         # connect the internet to accees volumes
16 |         vol = self._polo.marketVolume()
17 |         ticker = self._polo.marketTicker()
18 |         pairs = []
19 |         coins = []
20 |         volumes = []
21 |         prices = []
22 | 
23 |         logging.info("select coin online from %s to %s" % (datetime.fromtimestamp(end-(DAY*volume_average_days)-
24 |                                                                                   volume_forward).
25 |                                                            strftime('%Y-%m-%d %H:%M'),
26 |                                                            datetime.fromtimestamp(end-volume_forward).
27 |                                                            strftime('%Y-%m-%d %H:%M')))
28 |         for k, v in vol.items():
29 |             if k.startswith("BTC_") or k.endswith("_BTC"):
30 |                 pairs.append(k)
31 |                 for c, val in v.items():
32 |                     if c != 'BTC':
33 |                         if k.endswith('_BTC'):
34 |                             coins.append('reversed_' + c)
35 |                             prices.append(1.0 / float(ticker[k]['last']))
36 |                         else:
37 |                             coins.append(c)
38 |                             prices.append(float(ticker[k]['last']))
39 |                     else:
40 |                         volumes.append(self.__get_total_volume(pair=k, global_end=end,
41 |                                                                days=volume_average_days,
42 |                                                                forward=volume_forward))
43 |         self._df = pd.DataFrame({'coin': coins, 'pair': pairs, 'volume': volumes, 'price':prices})
44 |         self._df = self._df.set_index('coin')
45 | 
46 |     @property
47 |     def allActiveCoins(self):
48 |         return self._df
49 | 
50 |     @property
51 |     def allCoins(self):
52 |         return self._polo.marketStatus().keys()
53 | 
54 |     @property
55 |     def polo(self):
56 |         return self._polo
57 | 
58 |     def get_chart_until_success(self, pair, start, period, end):
59 |         return get_chart_until_success(self._polo, pair, start, period, end)
60 | 
61 |     # get several days volume
62 |     def __get_total_volume(self, pair, global_end, days, forward):
63 |         start = global_end-(DAY*days)-forward
64 |         end = global_end-forward
65 |         chart = self.get_chart_until_success(pair=pair, period=DAY, start=start, end=end)
66 |         result = 0
67 |         for one_day in chart:
68 |             if pair.startswith("BTC_"):
69 |                 result += one_day['volume']
70 |             else:
71 |                 result += one_day["quoteVolume"]
72 |         return result
73 | 
74 | 
75 |     def topNVolume(self, n=5, order=True, minVolume=0):
76 |         if minVolume == 0:
77 |             r = self._df.loc[self._df['price'] > 2e-6]
78 |             r = r.sort_values(by='volume', ascending=False)[:n]
79 |             print(r)
80 |             if order:
81 |                 return r
82 |             else:
83 |                 return r.sort_index()
84 |         else:
85 |             return self._df[self._df.volume >= minVolume]
86 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/bnn.py:
--------------------------------------------------------------------------------
  1 | from ..tdagent import TDAgent
  2 | import numpy as np
  3 | import logging
  4 | from scipy.optimize import minimize
  5 | 
  6 | class BNN(TDAgent):
  7 |     '''
  8 |     Non-parametric
  9 |     '''
 10 |     def __init__(self, K=5, L=10, exp_w=None):
 11 |         super(BNN, self).__init__()
 12 |         self.K = K
 13 |         self.L = L
 14 |         self.exp_ret = np.ones((K,L+1))
 15 |         self.exp_w = exp_w
 16 | 
 17 |     def get_b(self, x, last_b):
 18 |         self.record_history(x)
 19 | 
 20 |         data = self.history
 21 |         n, m = data.shape
 22 | 
 23 |         if self.exp_w is None:
 24 |             self.exp_w = np.ones((self.K*(self.L+1),m)) / m
 25 | 
 26 |         self.exp_w[self.K*self.L,:] = self.update(data, 0, 0)
 27 | 
 28 |         for k in np.arange(self.K):
 29 |             for l in np.arange(self.L):
 30 |                 pl = 0.02+0.5*(l-1)/(self.L-1)
 31 |                 self.exp_w[(k-1)*self.L+l,:] = self.update(data, k, pl)
 32 | 
 33 |         p = 1./(self.K*self.L)
 34 |         numerator = p * self.exp_ret[0,self.L] * self.exp_w[self.K*self.L,:]
 35 |         denominator = p * self.exp_ret[0, self.L]
 36 | 
 37 |         for k in np.arange(self.K):
 38 |             for l in np.arange(self.L):
 39 |                 numerator += p*self.exp_ret[k, l] * self.exp_w[(k-1)*self.L+l,:]
 40 |                 denominator += p*self.exp_ret[k,l]
 41 | 
 42 |         weight = numerator.T / denominator
 43 | 
 44 |         for k in np.arange(self.K):
 45 |             for l in np.arange(self.L):
 46 |                 self.exp_ret[k,l] *= np.dot(self.history[-1,:], self.exp_w[(k-1)*self.L+l-1,:])
 47 | 
 48 |         return weight
 49 | 
 50 |     def update(self, data, k, l):
 51 |         T, N = data.shape
 52 |         m = 0
 53 |         histdata = np.zeros((T,N))
 54 | 
 55 |         if T <= k+1:
 56 |             return np.ones((1,N)) / N
 57 | 
 58 |         if k==0 and l==0:
 59 |             histdata = data[:T,:]
 60 |             m = T
 61 |         else:
 62 |             normid = np.zeros((T-k,1))
 63 |             histdata = data[:T,:]
 64 |             normid[:k] = 0
 65 |             for i in np.arange(k+1,T):
 66 |                 data2 = data[i-k:i-1,:] - data[T-k+1:T,:]
 67 |                 normid[:i] = np.sqrt(np.trace(data2.dot(data2.T)))
 68 |                 sortpos = np.sort(normid)
 69 |                 sortpos = sortpos.astype(int)
 70 |                 m = int(np.floor(l*T))
 71 |                 for j in np.arange(m):
 72 |                     histdata = np.vstack((histdata,histdata[int(sortpos[j]),:]))
 73 |         if m == 0:
 74 |             return np.ones((1,N)) / N
 75 | 
 76 |         b = opt_weights(histdata)
 77 |         return b
 78 | 
 79 | def opt_weights(X, max_leverage=1):
 80 |     x_0 = max_leverage * np.ones(X.shape[1]) / float(X.shape[1])
 81 |     objective = lambda b: -np.sum(np.log(np.maximum(np.dot(X-1, b)+1,1e-4)))
 82 |     cons = ({'type': 'eq', 'fun': lambda b: max_leverage-sum(b)},)
 83 |     bnds = [(0., max_leverage)]*len(x_0)
 84 |     while True:
 85 |         res = minimize(objective, x_0, bounds=bnds, constraints=cons, method='slsqp')
 86 |         eps = 1e-7
 87 |         if (res.x < 0-eps).any() or (res.x > max_leverage+eps).any():
 88 |             X = X + np.random.randn(1)[0] * 1e-5
 89 |             logging.debug('Optimal weights not found, trying again')
 90 |             continue
 91 |         elif res.success:
 92 |             break
 93 |         else:
 94 |             if np.isnan(res.x).any():
 95 |                 logging.warning('Solution not found')
 96 |                 res.x = np.ones(X.shape[1]) / X.shape[1]
 97 |             else:
 98 |                 logging.warning("converged but not successfully")
 99 |             break
100 | 
101 |     return res.x
102 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/anticor1.py:
--------------------------------------------------------------------------------
  1 | from ..tdagent import TDAgent
  2 | import numpy as np
  3 | import logging
  4 | 
  5 | class ANTICOR1(TDAgent):
  6 |     '''
  7 |     anti-correlation olps
  8 |     '''
  9 |     def __init__(self, window=30, exp_w=None):
 10 |         super(ANTICOR1, self).__init__()
 11 |         self.window = window
 12 |         self.exp_ret = np.ones((window-1,1))
 13 |         self.exp_w = exp_w
 14 | 
 15 |     def decide_by_history(self, x, last_b):
 16 |         self.record_history(x)
 17 |         n, m = self.history.shape
 18 |         if self.exp_w is None:
 19 |             self.exp_w = np.ones((self.window-1,m)) / m
 20 | 
 21 |         for k in np.arange(1,self.window):
 22 |             self.exp_w[k-1,:] = self.update(self.history, self.exp_w[k-1,:], k+1)
 23 |         numerator = 0
 24 |         denominator = 0
 25 | 
 26 |         for k in np.arange(1,self.window):
 27 |             numerator += self.exp_ret[k-1] * self.exp_w[k-1,:]
 28 |             denominator += self.exp_ret[k-1]
 29 | 
 30 |         weight = numerator.T / denominator
 31 | 
 32 |         for k in np.arange(1, self.window):
 33 |             self.exp_ret[k-1] = np.dot(self.exp_ret[k-1]*self.history[-1,:], self.exp_w[k-1,:].T)
 34 | 
 35 |         self.exp_ret[:,0] /= np.sum(self.exp_ret[:,0])
 36 | 
 37 |         return weight
 38 | 
 39 |     def update(self, data,last_b, w):
 40 |         T, N = data.shape
 41 |         b = last_b
 42 | 
 43 |         if T >= 2*w :
 44 |             data1 = data[T-2*w:T-w,:]
 45 |             data2 = data[T-w:T,:]
 46 |             #print data1
 47 |             LX1 = np.log(data1)
 48 |             LX2 = np.log(data2)
 49 | 
 50 |             mu1 = np.mean(LX1, axis=0)
 51 |             mu2 = np.mean(LX2, axis=0)
 52 | 
 53 |             n_LX1 = LX1 - mu1
 54 |             n_LX2 = LX2 - mu2
 55 | 
 56 | 
 57 |             sig1 = np.diag(np.dot(n_LX1.T, n_LX1).T) / (w-1)
 58 |             sig2 = np.diag(np.dot(n_LX2.T, n_LX2).T) / (w-1)
 59 | 
 60 |             sig1 = sig1[:,None]
 61 |             sig2 = sig2[:,None]
 62 | 
 63 |             sigma = np.dot(sig1,sig2.T) #(N,N)
 64 | 
 65 |             mCov = n_LX1.T.dot(n_LX2) / (w-1)
 66 |             mCorr = np.zeros((N,N))
 67 | 
 68 |             mCorr = np.zeros((N,N))
 69 |             new_sigma = np.multiply(sigma, sigma!=0)
 70 |             new_sigma_zero_index = new_sigma==0
 71 |             new_sigma[new_sigma_zero_index] = 1e-8
 72 |             mCorr = np.multiply(mCov, sigma!=0) / np.sqrt(new_sigma)
 73 | 
 74 |             claim = np.zeros((N,N))
 75 |             w_mu2 = np.tile(mu2[None,...].T, (1,N))
 76 |             w_mu1 = np.tile(mu2[None,...], (N,1))
 77 | 
 78 |             s12 = np.multiply(w_mu2 >= w_mu1, mCorr>0)
 79 |             claim = np.multiply(claim, s12) + np.multiply(mCorr, s12)
 80 | 
 81 |             diag_mCorr = np.diag(mCorr)
 82 |             cor1 = np.maximum(0, np.tile(-diag_mCorr[...,None], (1,N)))
 83 |             cor2 = np.maximum(0, np.tile(-diag_mCorr[...,None].T, (N,1)))
 84 |             claim +=  np.multiply(cor1, s12) + np.multiply(cor2, s12)
 85 |             claim = np.multiply(claim, s12)
 86 | 
 87 |             transfer = np.zeros((N,N))
 88 |             s_claim = np.sum(claim, axis=1)
 89 |             sum_claim = np.tile(s_claim[...,None],(1,N))
 90 | 
 91 | 
 92 |             s1 = np.absolute(sum_claim) > 0
 93 | 
 94 |             w_b = np.tile(b[...,None], (1,N))
 95 |             mul_bc = np.multiply(w_b, s1) * np.multiply(claim, s1)
 96 |             deno = np.multiply(sum_claim, s1)
 97 |             deno_zero_index = deno==0
 98 |             deno[deno_zero_index] = 1e-8
 99 |             transfer = np.divide(mul_bc, deno)
100 |             transfer = np.where(np.isnan(transfer), 0, transfer)
101 | 
102 |             transfer_ij = transfer.T - transfer
103 |             sum_ij = np.sum(transfer_ij, axis=0)
104 | 
105 |             b = np.subtract(b, sum_ij.T)
106 | 
107 |         return b
108 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/bk.py:
--------------------------------------------------------------------------------
  1 | from ..tdagent import TDAgent
  2 | import numpy as np
  3 | import logging
  4 | from scipy.optimize import minimize
  5 | 
  6 | class BK(TDAgent):
  7 |     '''
  8 |     anti-correlation olps
  9 |     '''
 10 |     def __init__(self, K=5, L=10, c=1, exp_w=None):
 11 |         super(BK, self).__init__()
 12 |         self.K = K
 13 |         self.L = L
 14 |         self.c = c
 15 |         self.exp_ret = np.ones((K,L+1))
 16 |         self.exp_w = exp_w
 17 | 
 18 |     def decide_by_history(self, x, last_b):
 19 |         self.record_history(x)
 20 | 
 21 |         data = self.history
 22 | 
 23 |         n, m = data.shape
 24 | 
 25 |         if self.exp_w is None:
 26 |             self.exp_w = np.ones((self.K*(self.L+1),m)) / m
 27 | 
 28 |         self.exp_w[self.K*self.L,:] = self.update(data, 0, 0, self.c)
 29 | 
 30 |         for k in np.arange(self.K):
 31 |             for l in np.arange(self.L):
 32 |                 self.exp_w[(k-1)*self.L+l,:] = self.update(data, k, l, self.c)
 33 | 
 34 |         p = 1./(self.K*self.L)
 35 |         numerator = p * self.exp_ret[0,self.L] * self.exp_w[self.K*self.L,:]
 36 |         denominator = p * self.exp_ret[0, self.L]
 37 | 
 38 |         for k in np.arange(self.K):
 39 |             for l in np.arange(self.L):
 40 |                 numerator += p*self.exp_ret[k, l] * self.exp_w[(k-1)*self.L+l,:]
 41 |                 denominator += p*self.exp_ret[k,l]
 42 | 
 43 |         weight = numerator.T / denominator
 44 | 
 45 |         self.exp_ret[0, self.L] *= np.dot(self.history[-1,:], self.exp_w[self.K*self.L,:].T)
 46 | 
 47 |         for k in np.arange(self.K):
 48 |             for l in np.arange(self.L):
 49 |                 self.exp_ret[k,l] *= np.dot(self.history[-1,:], self.exp_w[(k-1)*self.L+l,:])
 50 | 
 51 | 
 52 |         return weight
 53 | 
 54 |     def update(self, data, k, l, c):
 55 |         '''
 56 |         :param w: window sze
 57 |         :param c: correlation coefficient threshold
 58 |         '''
 59 |         T, N = data.shape
 60 |         m = -1
 61 |         histdata = np.zeros((T,N))
 62 | 
 63 |         if T <= k+1:
 64 |             return np.ones(N) / N
 65 | 
 66 |         if k==0 and l==0:
 67 |             histdata = data[:T,:]
 68 |             m = T
 69 |         else:
 70 |             for i in np.arange(k+1, T):
 71 |                 #print 'i is %d k is %d T is %d\n' % (i,k,T)
 72 |                 data2 = data[i-k-1:i,:] - data[T-k-1:T,:]
 73 |                 #print data2
 74 | 
 75 |                 if np.sqrt(np.trace(np.dot(data2,data2.T))) <= c/l:
 76 |                     m += 1
 77 |                     histdata[m,:] = data[i,:] #minus one to avoid out of bounds issue
 78 | 
 79 |         if m==-1:
 80 |             return np.ones(N) / N
 81 | 
 82 |         b = opt_weights(histdata[:m+1,:])
 83 |         #print b
 84 |         #print 'w is %d\t T is %d\n' % (w,T)
 85 |         return b
 86 | 
 87 | def opt_weights(X, max_leverage=1):
 88 |     x_0 = max_leverage * np.ones(X.shape[1]) / float(X.shape[1])
 89 |     objective = lambda b: -np.sum(np.log(np.maximum(np.dot(X-1, b)+1,1e-4)))
 90 |     cons = ({'type': 'eq', 'fun': lambda b: max_leverage-sum(b)},)
 91 |     bnds = [(0., max_leverage)]*len(x_0)
 92 |     while True:
 93 |         res = minimize(objective, x_0, bounds=bnds, constraints=cons, method='slsqp')
 94 |         eps = 1e-7
 95 |         if (res.x < 0-eps).any() or (res.x > max_leverage+eps).any():
 96 |             X = X + np.random.randn(1)[0] * 1e-5
 97 |             logging.debug('Optimal weights not found, trying again')
 98 |             continue
 99 |         elif res.success:
100 |             break
101 |         else:
102 |             if np.isnan(res.x).any():
103 |                 logging.warning('Solution not found')
104 |                 res.x = np.ones(X.shape[1]) / X.shape[1]
105 |             else:
106 |                 logging.warning("converged but not successfully")
107 |             break
108 | 
109 |     return res.x
110 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/anticor2.py:
--------------------------------------------------------------------------------
  1 | from ..tdagent import TDAgent
  2 | import numpy as np
  3 | import logging
  4 | 
  5 | class ANTICOR2(TDAgent):
  6 |     '''
  7 |     anti-correlation
  8 |     equals to anticor-anticor in olps
  9 |     '''
 10 |     def __init__(self, window=30, exp_w=None, data_day=None):
 11 |         super(ANTICOR2, self).__init__()
 12 |         self.window = window
 13 |         self.exp_ret = np.ones((window-1,1))
 14 |         self.exp_w = exp_w
 15 |         self.exp_ret2 = np.ones((window-1,1))
 16 |         self.exp_w2 = np.ones((window-1, window-1)) / (window-1)
 17 |         self.data_day = data_day
 18 | 
 19 |     def decide_by_history(self, x, last_b):
 20 |         self.record_history(x)
 21 |         n, m = self.history.shape
 22 | 
 23 |         if self.exp_w is None:
 24 |             self.exp_w = np.ones((self.window-1,m)) / m
 25 | 
 26 |         if self.data_day is None:
 27 |             self.data_day = np.zeros((1,self.window-1))
 28 |             mid = np.dot(self.history[-1,:], self.exp_w.T)
 29 |             self.data_day = mid[None,...]
 30 | 
 31 | 
 32 |         for k in np.arange(1,self.window):
 33 |             self.exp_w[k-1,:] = self.update(self.history, self.exp_w[k-1,:], k+1)
 34 |             self.exp_w2[k-1,:] = self.update(self.data_day, self.exp_w2[k-1,:],k+1)
 35 | 
 36 | 
 37 | 
 38 |         numerator = 0
 39 |         denominator = 0
 40 | 
 41 |         for k in np.arange(1,self.window):
 42 |             numerator += self.exp_ret2[k-1] * self.exp_w2[k-1,:]
 43 |             denominator += self.exp_ret2[k-1]
 44 | 
 45 |         weight1 = numerator.T / denominator
 46 |         weight = self.exp_w.T.dot(weight1)
 47 | 
 48 |         if n>0:
 49 |             mid = np.dot(self.history[-1,:], self.exp_w.T)
 50 |             self.data_day = np.vstack((self.data_day, mid[None,...]))
 51 | 
 52 |         for k in np.arange(1, self.window):
 53 |             self.exp_w[k-1,:] *= self.history[-1,:] / self.data_day[-1,k-1]
 54 |             self.exp_ret2[k-1] *= self.data_day[-1,:].dot(self.exp_w2[k-1,:].T)
 55 |             self.exp_w2[k-1,:] *= self.data_day[-1,:] / np.dot(self.data_day[-1,:], self.exp_w2[k-1,:].T)
 56 | 
 57 | 
 58 |         return weight
 59 | 
 60 |     def update(self, data,last_b, w):
 61 |         T, N = data.shape
 62 |         b = last_b
 63 | 
 64 |         if T >= 2*w :
 65 |             data1 = data[T-2*w:T-w,:]
 66 |             data2 = data[T-w:T,:]
 67 |             LX1 = np.log(data1)
 68 |             LX2 = np.log(data2)
 69 | 
 70 |             mu1 = np.mean(LX1, axis=0)
 71 |             mu2 = np.mean(LX2, axis=0)
 72 | 
 73 |             n_LX1 = LX1 - mu1
 74 |             n_LX2 = LX2 - mu2
 75 | 
 76 | 
 77 |             sig1 = np.diag(np.dot(n_LX1.T, n_LX1).T) / (w-1)
 78 |             sig2 = np.diag(np.dot(n_LX2.T, n_LX2).T) / (w-1)
 79 | 
 80 |             sig1 = sig1[:,None]
 81 |             sig2 = sig2[:,None]
 82 | 
 83 |             sigma = np.dot(sig1,sig2.T) #(N,N)
 84 | 
 85 |             mCov = n_LX1.T.dot(n_LX2) / (w-1)
 86 |             mCorr = np.zeros((N,N))
 87 | 
 88 |             mCorr = np.zeros((N,N))
 89 |             mCorr = np.multiply(mCov, sigma!=0) / np.sqrt(np.multiply(sigma, sigma!=0))
 90 | 
 91 |             claim = np.zeros((N,N))
 92 |             w_mu2 = np.tile(mu2[None,...].T, (1,N))
 93 |             w_mu1 = np.tile(mu2[None,...], (N,1))
 94 | 
 95 |             s12 = np.multiply(w_mu2 >= w_mu1, mCorr>0)
 96 |             claim = np.multiply(claim, s12) + np.multiply(mCorr, s12)
 97 | 
 98 |             diag_mCorr = np.diag(mCorr)
 99 |             cor1 = np.maximum(0, np.tile(-diag_mCorr[...,None], (1,N)))
100 |             cor2 = np.maximum(0, np.tile(-diag_mCorr[...,None].T, (N,1)))
101 |             claim +=  np.multiply(cor1, s12) + np.multiply(cor2, s12)
102 |             claim = np.multiply(claim, s12)
103 | 
104 |             transfer = np.zeros((N,N))
105 |             s_claim = np.sum(claim, axis=1)
106 |             sum_claim = np.tile(s_claim[...,None],(1,N))
107 | 
108 | 
109 |             s1 = np.absolute(sum_claim) > 0
110 | 
111 |             w_b = np.tile(b[...,None], (1,N))
112 |             mul_bc = np.multiply(w_b, s1) * np.multiply(claim, s1)
113 |             deno = np.multiply(sum_claim, s1)
114 |             transfer = np.divide(mul_bc, deno)
115 |             transfer = np.where(np.isnan(transfer), 0, transfer)
116 | 
117 |             transfer_ij = transfer.T - transfer
118 |             sum_ij = np.sum(transfer_ij, axis=0)
119 | 
120 |             b = np.subtract(b, sum_ij.T)
121 | 
122 |         return b
123 | 


--------------------------------------------------------------------------------
/pgportfolio/tools/configprocess.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import sys
  3 | import time
  4 | from datetime import datetime
  5 | import json
  6 | import os
  7 | rootpath = os.path.dirname(os.path.abspath(__file__)).\
  8 |     replace("\\pgportfolio\\tools", "").replace("/pgportfolio/tools","")
  9 | 
 10 | try:
 11 |     unicode        # Python 2
 12 | except NameError:
 13 |     unicode = str  # Python 3
 14 | 
 15 | 
 16 | def preprocess_config(config):
 17 |     fill_default(config)
 18 |     if sys.version_info[0] == 2:
 19 |         return byteify(config)
 20 |     else:
 21 |         return config
 22 | 
 23 | 
 24 | def fill_default(config):
 25 |     set_missing(config, "random_seed", 0)
 26 |     set_missing(config, "agent_type", "NNAgent")
 27 |     fill_layers_default(config["layers"])
 28 |     fill_input_default(config["input"])
 29 |     fill_train_config(config["training"])
 30 | 
 31 | 
 32 | def fill_train_config(train_config):
 33 |     set_missing(train_config, "fast_train", True)
 34 |     set_missing(train_config, "decay_rate", 1.0)
 35 |     set_missing(train_config, "decay_steps", 50000)
 36 | 
 37 | 
 38 | def fill_input_default(input_config):
 39 |     set_missing(input_config, "save_memory_mode", False)
 40 |     set_missing(input_config, "portion_reversed", False)
 41 |     set_missing(input_config, "market", "poloniex")
 42 |     set_missing(input_config, "norm_method", "absolute")
 43 |     set_missing(input_config, "is_permed", False)
 44 |     set_missing(input_config, "fake_ratio", 1)
 45 | 
 46 | 
 47 | def fill_layers_default(layers):
 48 |     for layer in layers:
 49 |         if layer["type"] == "ConvLayer":
 50 |             set_missing(layer, "padding", "valid")
 51 |             set_missing(layer, "strides", [1, 1])
 52 |             set_missing(layer, "activation_function", "relu")
 53 |             set_missing(layer, "regularizer", None)
 54 |             set_missing(layer, "weight_decay", 0.0)
 55 |         elif layer["type"] == "EIIE_Dense":
 56 |             set_missing(layer, "activation_function", "relu")
 57 |             set_missing(layer, "regularizer", None)
 58 |             set_missing(layer, "weight_decay", 0.0)
 59 |         elif layer["type"] == "DenseLayer":
 60 |             set_missing(layer, "activation_function", "relu")
 61 |             set_missing(layer, "regularizer", None)
 62 |             set_missing(layer, "weight_decay", 0.0)
 63 |         elif layer["type"] == "EIIE_LSTM" or layer["type"] == "EIIE_RNN":
 64 |             set_missing(layer, "dropouts", None)
 65 |         elif layer["type"] == "EIIE_Output" or\
 66 |                 layer["type"] == "Output_WithW" or\
 67 |                 layer["type"] == "EIIE_Output_WithW":
 68 |             set_missing(layer, "regularizer", None)
 69 |             set_missing(layer, "weight_decay", 0.0)
 70 |         elif layer["type"] == "DropOut":
 71 |             pass
 72 |         else:
 73 |             raise ValueError("layer name {} not supported".format(layer["type"]))
 74 | 
 75 | 
 76 | def set_missing(config, name, value):
 77 |     if name not in config:
 78 |         config[name] = value
 79 | 
 80 | 
 81 | def byteify(input):
 82 |     if isinstance(input, dict):
 83 |         return {byteify(key): byteify(value)
 84 |                 for key, value in input.iteritems()}
 85 |     elif isinstance(input, list):
 86 |         return [byteify(element) for element in input]
 87 |     elif isinstance(input, unicode):
 88 |         return str(input)
 89 |     else:
 90 |         return input
 91 | 
 92 | 
 93 | def parse_time(time_string):
 94 |     return time.mktime(datetime.strptime(time_string, "%Y/%m/%d").timetuple())
 95 | 
 96 | 
 97 | def load_config(index=None):
 98 |     """
 99 |     @:param index: if None, load the default in pgportfolio;
100 |      if a integer, load the config under train_package
101 |     """
102 |     if index:
103 |         with open(rootpath+"/train_package/" + str(index) + "/net_config.json") as file:
104 |             config = json.load(file)
105 |     else:
106 |         with open(rootpath+"/pgportfolio/" + "net_config.json") as file:
107 |             config = json.load(file)
108 |     return preprocess_config(config)
109 | 
110 | 
111 | def check_input_same(config1, config2):
112 |     input1 = config1["input"]
113 |     input2 = config2["input"]
114 |     if input1["start_date"] != input2["start_date"]:
115 |         return False
116 |     elif input1["end_date"] != input2["end_date"]:
117 |         return False
118 |     elif input1["test_portion"] != input2["test_portion"]:
119 |         return False
120 |     else:
121 |         return True
122 | 
123 | 


--------------------------------------------------------------------------------
/pgportfolio/tools/data.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division,absolute_import,print_function
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | 
  6 | def pricenorm3d(m, features, norm_method, fake_ratio=1.0, with_y=True):
  7 |     """normalize the price tensor, whose shape is [features, coins, windowsize]
  8 |     @:param m: input tensor, unnormalized and there could be nan in it
  9 |     @:param with_y: if the tensor include y (future price)
 10 |         logging.debug("price are %s" % (self._latest_price_matrix[0, :, -1]))
 11 |     """
 12 |     result = m.copy()
 13 |     if features[0] != "close":
 14 |         raise ValueError("first feature must be close")
 15 |     for i, feature in enumerate(features):
 16 |         if with_y:
 17 |             one_position = 2
 18 |         else:
 19 |             one_position = 1
 20 |         pricenorm2d(result[i], m[0, :, -one_position], norm_method=norm_method,
 21 |                     fake_ratio=fake_ratio, one_position=one_position)
 22 |     return result
 23 | 
 24 | 
 25 | # input m is a 2d matrix, (coinnumber+1) * windowsize
 26 | def pricenorm2d(m, reference_column,
 27 |                 norm_method="absolute", fake_ratio=1.0, one_position=2):
 28 |     if norm_method=="absolute":
 29 |         output = np.zeros(m.shape)
 30 |         for row_number, row in enumerate(m):
 31 |             if np.isnan(row[-one_position]) or np.isnan(reference_column[row_number]):
 32 |                 row[-one_position] = 1.0
 33 |                 for index in range(row.shape[0] - one_position + 1):
 34 |                     if index > 0:
 35 |                         row[-one_position - index] = row[-index - one_position + 1] / fake_ratio
 36 |                 row[-one_position] = 1.0
 37 |                 row[-1] = fake_ratio
 38 |             else:
 39 |                 row = row / reference_column[row_number]
 40 |                 for index in range(row.shape[0] - one_position + 1):
 41 |                     if index > 0 and np.isnan(row[-one_position - index]):
 42 |                         row[-one_position - index] = row[-index - one_position + 1] / fake_ratio
 43 |                 if np.isnan(row[-1]):
 44 |                     row[-1] = fake_ratio
 45 |             output[row_number] = row
 46 |         m[:] = output[:]
 47 |     elif norm_method=="relative":
 48 |         output = m[:, 1:]
 49 |         divisor = m[:, :-1]
 50 |         output = output / divisor
 51 |         pad = np.empty((m.shape[0], 1,))
 52 |         pad.fill(np.nan)
 53 |         m[:] = np.concatenate((pad, output), axis=1)
 54 |         m[np.isnan(m)] = fake_ratio
 55 |     else:
 56 |         raise ValueError("there is no norm morthod called %s" % norm_method)
 57 | 
 58 | 
 59 | def get_chart_until_success(polo, pair, start, period, end):
 60 |     is_connect_success = False
 61 |     chart = {}
 62 |     while not is_connect_success:
 63 |         try:
 64 |             chart = polo.marketChart(pair=pair, start=int(start), period=int(period), end=int(end))
 65 |             is_connect_success = True
 66 |         except Exception as e:
 67 |             print(e)
 68 |     return chart
 69 | 
 70 | 
 71 | def get_type_list(feature_number):
 72 |     """
 73 |     :param feature_number: an int indicates the number of features
 74 |     :return: a list of features n
 75 |     """
 76 |     if feature_number == 1:
 77 |         type_list = ["close"]
 78 |     elif feature_number == 2:
 79 |         type_list = ["close", "volume"]
 80 |         raise NotImplementedError("the feature volume is not supported currently")
 81 |     elif feature_number == 3:
 82 |         type_list = ["close", "high", "low"]
 83 |     elif feature_number == 4:
 84 |         type_list = ["close", "high", "low", "open"]
 85 |     else:
 86 |         raise ValueError("feature number could not be %s" % feature_number)
 87 |     return type_list
 88 | 
 89 | 
 90 | def panel2array(panel):
 91 |     """convert the panel to datatensor (numpy array) without btc
 92 |     """
 93 |     without_btc = np.transpose(panel.values, axes=(2, 0, 1))
 94 |     return without_btc
 95 | 
 96 | 
 97 | def count_periods(start, end, period_length):
 98 |     """
 99 |     :param start: unix time, excluded
100 |     :param end: unix time, included
101 |     :param period_length: length of the period
102 |     :return: 
103 |     """
104 |     return (int(end)-int(start)) // period_length
105 | 
106 | 
107 | def get_volume_forward(time_span, portion, portion_reversed):
108 |     volume_forward = 0
109 |     if not portion_reversed:
110 |         volume_forward = time_span*portion
111 |     return volume_forward
112 | 
113 | 
114 | def panel_fillna(panel, type="bfill"):
115 |     """
116 |     fill nan along the 3rd axis
117 |     :param panel: the panel to be filled
118 |     :param type: bfill or ffill
119 |     """
120 |     frames = {}
121 |     for item in panel.items:
122 |         if type == "both":
123 |             frames[item] = panel.loc[item].fillna(axis=1, method="bfill").\
124 |                 fillna(axis=1, method="ffill")
125 |         else:
126 |             frames[item] = panel.loc[item].fillna(axis=1, method=type)
127 |     return pd.Panel(frames)
128 | 
129 | 


--------------------------------------------------------------------------------
/pgportfolio/trade/trader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import numpy as np
  3 | import pandas as pd
  4 | from pgportfolio.learn.rollingtrainer import RollingTrainer
  5 | import logging
  6 | import time
  7 | 
  8 | 
  9 | class Trader:
 10 |     def __init__(self, waiting_period, config, total_steps, net_dir, agent=None, initial_BTC=1.0, agent_type="nn"):
 11 |         """
 12 |         @:param agent_type: string, could be nn or traditional
 13 |         @:param agent: the traditional agent object, if the agent_type is traditional
 14 |         """
 15 |         self._steps = 0
 16 |         self._total_steps = total_steps
 17 |         self._period = waiting_period
 18 |         self._agent_type = agent_type
 19 | 
 20 |         if agent_type == "traditional":
 21 |             config["input"]["feature_number"] = 1
 22 |             config["input"]["norm_method"] = "relative"
 23 |             self._norm_method = "relative"
 24 |         elif agent_type == "nn":
 25 |             self._rolling_trainer = RollingTrainer(config, net_dir, agent=agent)
 26 |             self._coin_name_list = self._rolling_trainer.coin_list
 27 |             self._norm_method = config["input"]["norm_method"]
 28 |             if not agent:
 29 |                 agent = self._rolling_trainer.agent
 30 |         else:
 31 |             raise ValueError()
 32 |         self._agent = agent
 33 | 
 34 |         # the total assets is calculated with BTC
 35 |         self._total_capital = initial_BTC
 36 |         self._window_size = config["input"]["window_size"]
 37 |         self._coin_number = config["input"]["coin_number"]
 38 |         self._commission_rate = config["trading"]["trading_consumption"]
 39 |         self._fake_ratio = config["input"]["fake_ratio"]
 40 |         self._asset_vector = np.zeros(self._coin_number+1)
 41 | 
 42 |         self._last_omega = np.zeros((self._coin_number+1,))
 43 |         self._last_omega[0] = 1.0
 44 | 
 45 |         if self.__class__.__name__=="BackTest":
 46 |             # self._initialize_logging_data_frame(initial_BTC)
 47 |             self._logging_data_frame = None
 48 |             # self._disk_engine =  sqlite3.connect('./database/back_time_trading_log.db')
 49 |             # self._initialize_data_base()
 50 |         self._current_error_state = 'S000'
 51 |         self._current_error_info = ''
 52 | 
 53 |     def _initialize_logging_data_frame(self, initial_BTC):
 54 |         logging_dict = {'Total Asset (BTC)': initial_BTC, 'BTC': 1}
 55 |         for coin in self._coin_name_list:
 56 |             logging_dict[coin] = 0
 57 |         self._logging_data_frame = pd.DataFrame(logging_dict, index=pd.to_datetime([time.time()], unit='s'))
 58 | 
 59 |     def generate_history_matrix(self):
 60 |         """override this method to generate the input of agent
 61 |         """
 62 |         pass
 63 | 
 64 |     def finish_trading(self):
 65 |         pass
 66 | 
 67 |     # add trading data into the pandas data frame
 68 |     def _log_trading_info(self, time, omega):
 69 |         time_index = pd.to_datetime([time], unit='s')
 70 |         if self._steps > 0:
 71 |             logging_dict = {'Total Asset (BTC)': self._total_capital, 'BTC': omega[0, 0]}
 72 |             for i in range(len(self._coin_name_list)):
 73 |                 logging_dict[self._coin_name_list[i]] = omega[0, i + 1]
 74 |             new_data_frame = pd.DataFrame(logging_dict, index=time_index)
 75 |             self._logging_data_frame = self._logging_data_frame.append(new_data_frame)
 76 | 
 77 |     def trade_by_strategy(self, omega):
 78 |         """execute the trading to the position, represented by the portfolio vector w
 79 |         """
 80 |         pass
 81 | 
 82 |     def rolling_train(self):
 83 |         """
 84 |         execute rolling train
 85 |         """
 86 |         pass
 87 | 
 88 |     def __trade_body(self):
 89 |         self._current_error_state = 'S000'
 90 |         starttime = time.time()
 91 |         omega = self._agent.decide_by_history(self.generate_history_matrix(),
 92 |                                               self._last_omega.copy())
 93 |         self.trade_by_strategy(omega)
 94 |         if self._agent_type == "nn":
 95 |             self.rolling_train()
 96 |         if not self.__class__.__name__=="BackTest":
 97 |             self._last_omega = omega.copy()
 98 |         logging.info('total assets are %3f BTC' % self._total_capital)
 99 |         logging.debug("="*30)
100 |         trading_time = time.time() - starttime
101 |         if trading_time < self._period:
102 |             logging.info("sleep for %s seconds" % (self._period - trading_time))
103 |         self._steps += 1
104 |         return self._period - trading_time
105 | 
106 |     def start_trading(self):
107 |         try:
108 |             if not self.__class__.__name__=="BackTest":
109 |                 current = int(time.time())
110 |                 wait = self._period - (current%self._period)
111 |                 logging.info("sleep for %s seconds" % wait)
112 |                 time.sleep(wait+2)
113 | 
114 |                 while self._steps < self._total_steps:
115 |                     sleeptime = self.__trade_body()
116 |                     time.sleep(sleeptime)
117 |             else:
118 |                 while self._steps < self._total_steps:
119 |                     self.__trade_body()
120 |         finally:
121 |             if self._agent_type=="nn":
122 |                 self._agent.recycle()
123 |             self.finish_trading()
124 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import json
  3 | import logging
  4 | import os
  5 | import time
  6 | from argparse import ArgumentParser
  7 | from datetime import datetime
  8 | 
  9 | from pgportfolio.tools.configprocess import preprocess_config
 10 | from pgportfolio.tools.configprocess import load_config
 11 | from pgportfolio.tools.trade import save_test_data
 12 | from pgportfolio.tools.shortcut import execute_backtest
 13 | from pgportfolio.resultprocess import plot
 14 | 
 15 | 
 16 | def build_parser():
 17 |     parser = ArgumentParser()
 18 |     parser.add_argument("--mode",dest="mode",
 19 |                         help="start mode, train, generate, download_data"
 20 |                              " backtest",
 21 |                         metavar="MODE", default="train")
 22 |     parser.add_argument("--processes", dest="processes",
 23 |                         help="number of processes you want to start to train the network",
 24 |                         default="1")
 25 |     parser.add_argument("--repeat", dest="repeat",
 26 |                         help="repeat times of generating training subfolder",
 27 |                         default="1")
 28 |     parser.add_argument("--algo",
 29 |                         help="algo name or indexes of training_package ",
 30 |                         dest="algo")
 31 |     parser.add_argument("--algos",
 32 |                         help="algo names or indexes of training_package, seperated by \",\"",
 33 |                         dest="algos")
 34 |     parser.add_argument("--labels", dest="labels",
 35 |                         help="names that will shown in the figure caption or table header")
 36 |     parser.add_argument("--format", dest="format", default="raw",
 37 |                         help="format of the table printed")
 38 |     parser.add_argument("--device", dest="device", default="cpu",
 39 |                         help="device to be used to train")
 40 |     parser.add_argument("--folder", dest="folder", type=int,
 41 |                         help="folder(int) to load the config, neglect this option if loading from ./pgportfolio/net_config")
 42 |     return parser
 43 | 
 44 | 
 45 | def main():
 46 |     parser = build_parser()
 47 |     options = parser.parse_args()
 48 |     if not os.path.exists("./" + "train_package"):
 49 |         os.makedirs("./" + "train_package")
 50 |     if not os.path.exists("./" + "database"):
 51 |         os.makedirs("./" + "database")
 52 | 
 53 |     if options.mode == "train":
 54 |         import pgportfolio.autotrain.training
 55 |         if not options.algo:
 56 |             pgportfolio.autotrain.training.train_all(int(options.processes), options.device)
 57 |         else:
 58 |             for folder in options.folder:
 59 |                 raise NotImplementedError()
 60 |     elif options.mode == "generate":
 61 |         import pgportfolio.autotrain.generate as generate
 62 |         logging.basicConfig(level=logging.INFO)
 63 |         generate.add_packages(load_config(), int(options.repeat))
 64 |     elif options.mode == "download_data":
 65 |         from pgportfolio.marketdata.datamatrices import DataMatrices
 66 |         with open("./pgportfolio/net_config.json") as file:
 67 |             config = json.load(file)
 68 |         config = preprocess_config(config)
 69 |         start = time.mktime(datetime.strptime(config["input"]["start_date"], "%Y/%m/%d").timetuple())
 70 |         end = time.mktime(datetime.strptime(config["input"]["end_date"], "%Y/%m/%d").timetuple())
 71 |         DataMatrices(start=start,
 72 |                      end=end,
 73 |                      feature_number=config["input"]["feature_number"],
 74 |                      window_size=config["input"]["window_size"],
 75 |                      online=True,
 76 |                      period=config["input"]["global_period"],
 77 |                      volume_average_days=config["input"]["volume_average_days"],
 78 |                      coin_filter=config["input"]["coin_number"],
 79 |                      is_permed=config["input"]["is_permed"],
 80 |                      test_portion=config["input"]["test_portion"],
 81 |                      portion_reversed=config["input"]["portion_reversed"])
 82 |     elif options.mode == "backtest":
 83 |         config = _config_by_algo(options.algo)
 84 |         _set_logging_by_algo(logging.DEBUG, logging.DEBUG, options.algo, "backtestlog")
 85 |         execute_backtest(options.algo, config)
 86 |     elif options.mode == "save_test_data":
 87 |         # This is used to export the test data
 88 |         save_test_data(load_config(options.folder))
 89 |     elif options.mode == "plot":
 90 |         logging.basicConfig(level=logging.INFO)
 91 |         algos = options.algos.split(",")
 92 |         if options.labels:
 93 |             labels = options.labels.replace("_"," ")
 94 |             labels = labels.split(",")
 95 |         else:
 96 |             labels = algos
 97 |         plot.plot_backtest(load_config(), algos, labels)
 98 |     elif options.mode == "table":
 99 |         algos = options.algos.split(",")
100 |         if options.labels:
101 |             labels = options.labels.replace("_"," ")
102 |             labels = labels.split(",")
103 |         else:
104 |             labels = algos
105 |         plot.table_backtest(load_config(), algos, labels, format=options.format)
106 | 
107 | def _set_logging_by_algo(console_level, file_level, algo, name):
108 |     if algo.isdigit():
109 |             logging.basicConfig(filename="./train_package/"+algo+"/"+name,
110 |                                 level=file_level)
111 |             console = logging.StreamHandler()
112 |             console.setLevel(console_level)
113 |             logging.getLogger().addHandler(console)
114 |     else:
115 |         logging.basicConfig(level=console_level)
116 | 
117 | 
118 | def _config_by_algo(algo):
119 |     """
120 |     :param algo: a string represent index or algo name
121 |     :return : a config dictionary
122 |     """
123 |     if not algo:
124 |         raise ValueError("please input a specific algo")
125 |     elif algo.isdigit():
126 |         config = load_config(algo)
127 |     else:
128 |         config = load_config()
129 |     return config
130 | 
131 | if __name__ == "__main__":
132 |     main()
133 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/algorithms/anticor_deprecated.py:
--------------------------------------------------------------------------------
  1 | from ..tdagent import TDAgent
  2 | import numpy as np
  3 | import warnings
  4 | import pandas as pd
  5 | from pandas.stats.moments import rolling_corr
  6 | 
  7 | class ANTICOR(TDAgent):
  8 |     """ Anticor (anti-correlation) is a heuristic portfolio selection algorithm.
  9 |     It adopts the consistency of positive lagged cross-correlation and negative
 10 |     autocorrelation to adjust the portfolio. Eventhough it has no known bounds and
 11 |     hence is not considered to be universal, it has very strong empirical results.
 12 | 
 13 |     It has implemented C version in scipy.weave to improve performance (around 10x speed up).
 14 |     Another option is to use Numba.
 15 | 
 16 |     Reference:
 17 |         A. Borodin, R. El-Yaniv, and V. Gogan.  Can we learn to beat the best stock, 2005.
 18 |         http://www.cs.technion.ac.il/~rani/el-yaniv-papers/BorodinEG03.pdf
 19 |     """
 20 | 
 21 |     def __init__(self, window=30, c_version=True):
 22 |         """
 23 |         :param window: Window parameter.
 24 |         :param c_version: Use c_version, up to 10x speed-up.
 25 |         """
 26 |         super(ANTICOR, self).__init__()
 27 |         self.window = window
 28 |         self.c_version = c_version
 29 | 
 30 | 
 31 |     def decide_by_history(self, x, last_b=None):
 32 |         self.record_history(x)
 33 |         window = self.window
 34 |         port = pd.DataFrame(self.history)
 35 |         n, m = port.shape
 36 |         weights = 1. / m * np.ones(port.shape)
 37 | 
 38 |         CORR, EX = rolling_corr(port, port.shift(window))
 39 | 
 40 |         if self.c_version:
 41 |             try:
 42 |                 from scipy import weave
 43 |             except ImportError:
 44 |                 warnings.warn('scipy.weave is not available in python3, falling back to python version')
 45 |                 self.c_version = False
 46 | 
 47 |         if self.c_version is False:
 48 |             for t in range(n - 1):
 49 |                 M = CORR[t, :, :]
 50 |                 mu = EX[t, :]
 51 | 
 52 |                 # claim[i,j] is claim from stock i to j
 53 |                 claim = np.zeros((m, m))
 54 | 
 55 |                 for i in range(m):
 56 |                     for j in range(m):
 57 |                         if i == j: continue
 58 | 
 59 |                         if mu[i] > mu[j] and M[i, j] > 0:
 60 |                             claim[i, j] += M[i, j]
 61 |                             # autocorrelation
 62 |                             if M[i, i] < 0:
 63 |                                 claim[i, j] += abs(M[i, i])
 64 |                             if M[j, j] < 0:
 65 |                                 claim[i, j] += abs(M[j, j])
 66 | 
 67 |                 # calculate transfer
 68 |                 transfer = claim * 0.
 69 |                 for i in range(m):
 70 |                     total_claim = sum(claim[i, :])
 71 |                     if total_claim != 0:
 72 |                         transfer[i, :] = weights[t, i] * claim[i, :] / total_claim
 73 | 
 74 |                 # update weights
 75 |                 weights[t + 1, :] = weights[t, :] + np.sum(transfer, axis=0) - np.sum(transfer, axis=1)
 76 | 
 77 |         else:
 78 |             def get_weights_c(c, mu, w):
 79 |                 code = """
 80 |                 int t,i,j;
 81 |                 float claim [Nc[1]] [Nc[1]];
 82 |                 float transfer [Nc[1]] [Nc[1]];
 83 | 
 84 |                 for (t=0; t<Nc[0]-1; t++) {
 85 | 
 86 |                     for (i=0; i<Nc[1]; i++) {
 87 |                         for (j=0; j<Nc[1]; j++) {
 88 |                             claim[i][j] = 0.;
 89 |                             transfer[i][j] = 0.;
 90 |                         }
 91 |                     }
 92 | 
 93 |                     for (i=0; i<Nc[1]; i++) {
 94 |                         for (j=0; j<Nc[1]; j++) {
 95 |                             if(i != j){
 96 |                                 if(MU2(t,i) > MU2(t,j)  && C3(t,i,j) > 0){
 97 |                                     claim[i][j] += C3(t,i,j);
 98 |                                     if(C3(t,i,i) < 0)
 99 |                                         claim[i][j] -= C3(t,i,i);
100 |                                     if(C3(t,j,j) < 0)
101 |                                         claim[i][j] -= C3(t,j,j);
102 |                                 }
103 |                             }
104 |                         }
105 |                     }
106 | 
107 |                     for (i=0; i<Nc[1]; i++) {
108 |                         float total_claim=0.;
109 |                         for (j=0; j<Nc[1]; j++) {
110 |                             total_claim += claim[i][j];
111 |                         }
112 |                         if(total_claim != 0){
113 |                             for (j=0; j<Nc[1]; j++) {
114 |                                 transfer[i][j] = W2(t,i) * claim[i][j] / total_claim;
115 |                             }
116 |                         }
117 | 
118 |                     }
119 | 
120 |                     for (i=0; i<Nc[1]; i++) {
121 |                         W2(t+1,i) = W2(t,i);
122 |                         for (j=0; j<Nc[1]; j++) {
123 |                             W2(t+1,i) += transfer[j][i] - transfer[i][j];
124 |                         }
125 |                     }
126 |                 }
127 |                 """
128 |                 return weave.inline(code, ['c', 'mu', 'w'])
129 | 
130 |             get_weights_c(CORR, EX, weights)
131 | 
132 |         return weights[-1,:]
133 | 
134 | def rolling_corr(x, y):
135 |     '''Rolling correlation between columns from x and y'''
136 |     def rolling(dataframe):
137 |         ret = dataframe.copy()
138 |         for col in ret:
139 |             ret[col] = ret[col].rolling(window=5).mean()
140 |         return ret
141 | 
142 |     n, k = x.shape
143 | 
144 |     EX = rolling(x)
145 |     EY = rolling(y)
146 |     EX2 = rolling(x**2)
147 |     EY2 = rolling(y**2)
148 | 
149 |     RXY = np.zeros((n,k,k))
150 | 
151 |     for i, col_x in enumerate(x):
152 |         for j, col_y in enumerate(y):
153 |             DX = EX2[col_x] - EX[col_x] ** 2
154 |             DY = EY2[col_y] - EY[col_y] ** 2
155 |             product_xy = x[col_x] * y[col_y]
156 |             RXY[:, i, j] = product_xy.rolling(window=5).mean()- EX[col_x] * EY[col_y]
157 |             RXY[:, i, j] = RXY[:, i, j] / np.sqrt(DX * DY)
158 | 
159 |     return RXY, EX.values
160 | 


--------------------------------------------------------------------------------
/pgportfolio/resultprocess/plot.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | import matplotlib.pyplot as plt
  3 | import matplotlib.dates as mdates
  4 | from matplotlib import rc
  5 | import pandas as pd
  6 | import logging
  7 | import json
  8 | import numpy as np
  9 | import datetime
 10 | from pgportfolio.tools.indicator import max_drawdown, sharpe, positive_count, negative_count, moving_accumulate
 11 | from pgportfolio.tools.configprocess import parse_time, check_input_same
 12 | from pgportfolio.tools.shortcut import execute_backtest
 13 | 
 14 | # the dictionary of name of indicators mapping to the function of related indicators
 15 | # input is portfolio changes
 16 | INDICATORS = {"portfolio value": np.prod,
 17 |               "sharpe ratio": sharpe,
 18 |               "max drawdown": max_drawdown,
 19 |               "positive periods": positive_count,
 20 |               "negative periods": negative_count,
 21 |               "postive day": lambda pcs: positive_count(moving_accumulate(pcs, 48)),
 22 |               "negative day": lambda pcs: negative_count(moving_accumulate(pcs, 48)),
 23 |               "postive week": lambda pcs: positive_count(moving_accumulate(pcs, 336)),
 24 |               "negative week": lambda pcs: negative_count(moving_accumulate(pcs, 336)),
 25 |               "average": np.mean}
 26 | 
 27 | NAMES = {"best": "Best Stock (Benchmark)",
 28 |          "crp": "UCRP (Benchmark)",
 29 |          "ubah": "UBAH (Benchmark)",
 30 |          "anticor": "ANTICOR",
 31 |          "olmar": "OLMAR",
 32 |          "pamr": "PAMR",
 33 |          "cwmr": "CWMR",
 34 |          "rmr": "RMR",
 35 |          "ons": "ONS",
 36 |          "up": "UP",
 37 |          "eg": "EG",
 38 |          "bk": "BK",
 39 |          "corn": "CORN",
 40 |          "m0": "M0",
 41 |          "wmamr": "WMAMR"
 42 |          }
 43 | 
 44 | def plot_backtest(config, algos, labels=None):
 45 |     """
 46 |     @:param config: config dictionary
 47 |     @:param algos: list of strings representing the name of algorithms or index of pgportfolio result
 48 |     """
 49 |     results = []
 50 |     for i, algo in enumerate(algos):
 51 |         if algo.isdigit():
 52 |             results.append(np.cumprod(_load_from_summary(algo, config)))
 53 |             logging.info("load index "+algo+" from csv file")
 54 |         else:
 55 |             logging.info("start executing "+algo)
 56 |             results.append(np.cumprod(execute_backtest(algo, config)))
 57 |             logging.info("finish executing "+algo)
 58 | 
 59 |     start, end = _extract_test(config)
 60 |     timestamps = np.linspace(start, end, len(results[0]))
 61 |     dates = [datetime.datetime.fromtimestamp(int(ts)-int(ts)%config["input"]["global_period"])
 62 |              for ts in timestamps]
 63 | 
 64 |     weeks = mdates.WeekdayLocator()
 65 |     days = mdates.DayLocator()
 66 | 
 67 |     rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"],
 68 |                   "size": 8})
 69 | 
 70 |     """
 71 |     styles = [("-", None), ("--", None), ("", "+"), (":", None),
 72 |               ("", "o"), ("", "v"), ("", "*")]
 73 |     """
 74 |     fig, ax = plt.subplots()
 75 |     fig.set_size_inches(9, 5)
 76 |     for i, pvs in enumerate(results):
 77 |         if len(labels) > i:
 78 |             label = labels[i]
 79 |         else:
 80 |             label = NAMES[algos[i]]
 81 |         ax.semilogy(dates, pvs, linewidth=1, label=label)
 82 |         #ax.plot(dates, pvs, linewidth=1, label=label)
 83 | 
 84 |     plt.ylabel("portfolio value $p_t/p_0$", fontsize=12)
 85 |     plt.xlabel("time", fontsize=12)
 86 |     xfmt = mdates.DateFormatter("%m-%d %H:%M")
 87 |     ax.xaxis.set_major_locator(weeks)
 88 |     ax.xaxis.set_minor_locator(days)
 89 |     datemin = dates[0]
 90 |     datemax = dates[-1]
 91 |     ax.set_xlim(datemin, datemax)
 92 | 
 93 |     ax.xaxis.set_major_formatter(xfmt)
 94 |     plt.grid(True)
 95 |     plt.tight_layout()
 96 |     ax.legend(loc="upper left", prop={"size":10})
 97 |     fig.autofmt_xdate()
 98 |     plt.savefig("result.eps", bbox_inches='tight',
 99 |                 pad_inches=0)
100 |     plt.show()
101 | 
102 | 
103 | def table_backtest(config, algos, labels=None, format="raw",
104 |                    indicators=list(INDICATORS.keys())):
105 |     """
106 |     @:param config: config dictionary
107 |     @:param algos: list of strings representing the name of algorithms
108 |     or index of pgportfolio result
109 |     @:param format: "raw", "html", "latex" or "csv". If it is "csv",
110 |     the result will be save in a csv file. otherwise only print it out
111 |     @:return: a string of html or latex code
112 |     """
113 |     results = []
114 |     labels = list(labels)
115 |     for i, algo in enumerate(algos):
116 |         if algo.isdigit():
117 |             portfolio_changes = _load_from_summary(algo, config)
118 |             logging.info("load index " + algo + " from csv file")
119 |         else:
120 |             logging.info("start executing " + algo)
121 |             portfolio_changes = execute_backtest(algo, config)
122 |             logging.info("finish executing " + algo)
123 | 
124 |         indicator_result = {}
125 |         for indicator in indicators:
126 |             indicator_result[indicator] = INDICATORS[indicator](portfolio_changes)
127 |         results.append(indicator_result)
128 |         if len(labels)<=i:
129 |             labels.append(NAMES[algo])
130 | 
131 |     dataframe = pd.DataFrame(results, index=labels)
132 | 
133 |     start, end = _extract_test(config)
134 |     start = datetime.datetime.fromtimestamp(start - start%config["input"]["global_period"])
135 |     end = datetime.datetime.fromtimestamp(end - end%config["input"]["global_period"])
136 | 
137 |     print("backtest start from "+ str(start) + " to " + str(end))
138 |     if format == "html":
139 |         print(dataframe.to_html())
140 |     elif format == "latex":
141 |         print(dataframe.to_latex())
142 |     elif format == "raw":
143 |         print(dataframe.to_string())
144 |     elif format == "csv":
145 |         dataframe.to_csv("./compare"+end.strftime("%Y-%m-%d")+".csv")
146 |     else:
147 |         raise ValueError("The format " + format + " is not supported")
148 | 
149 | 
150 | def _extract_test(config):
151 |     global_start = parse_time(config["input"]["start_date"])
152 |     global_end = parse_time(config["input"]["end_date"])
153 |     span = global_end - global_start
154 |     start = global_end - config["input"]["test_portion"] * span
155 |     end = global_end
156 |     return start, end
157 | 
158 | 
159 | def _load_from_summary(index, config):
160 |     """ load the backtest result form train_package/train_summary
161 |     @:param index: index of the training and backtest
162 |     @:return: numpy array of the portfolio changes
163 |     """
164 |     dataframe = pd.DataFrame.from_csv("./train_package/train_summary.csv")
165 |     history_string = dataframe.loc[int(index)]["backtest_test_history"]
166 |     if not check_input_same(config, json.loads(dataframe.loc[int(index)]["config"])):
167 |         raise ValueError("the date of this index is not the same as the default config")
168 |     return np.fromstring(history_string, sep=",")[:-1]
169 | 
170 | 


--------------------------------------------------------------------------------
/user_guide.md:
--------------------------------------------------------------------------------
  1 | # User Guide
  2 | ## Configuration File
  3 | Under the `nntrader/nntrader` directory, there is a json file called `net_config.json`,
  4 |  holding all the configuration of the agent and could be modified outside the program code.
  5 | ### Network Topology
  6 | * `"layers"`
  7 |     * layers list of the CNN, including the output layer
  8 |     * `"type"`
  9 |         * domain is {"ConvLayer", "FullyLayer", "DropOut", "MaxPooling",
 10 |         "AveragePooling", "LocalResponseNormalization", "SingleMachineOutput",
 11 |         "LSTMSingleMachine", "RNNSingleMachine"}
 12 |     * `"filter shape"`
 13 |         * shape of the filter (kernal) of the Convolution Layer
 14 | * `"input"`
 15 |     * `"window_size"`
 16 |         * number of columns of the input matrix
 17 |     * `"coin_number"`
 18 |         * number of rows of the input matrix
 19 |     * `"feature_number"`
 20 |         * number of features (just like RGB in computer vision)
 21 |         * domain is {1, 2, 3}
 22 |         * 1 means the feature is ["close"], last price of each period
 23 |         * 2 means the feature is ["close", "volume"]
 24 |         * 3 means the features are ["close", "high", "low"]
 25 | 
 26 | ### Market Data
 27 | * `"input "`
 28 |     * `"start_date"`
 29 |         * start date of the global data matrix
 30 |         * format is yyyy/MM/dd
 31 |     * `"end_date"`
 32 |         * start date of the global data matrix
 33 |         * format is yyyy/MM/dd
 34 |         * The performance could varied a lot in different time ranges.
 35 |     * `"volume_average_days"`
 36 |         * number of days of volume used to select the coins
 37 |     * `"test_portion"`
 38 |         * portion of backtest data, ranging from 0 to 1. The left is training data.
 39 |     * `"global_period"`
 40 |         * trading period and period of prices in input window.
 41 |         * should be a multiple of 300 (seconds)
 42 |     * `"coin_number"`
 43 |         * number of assets to be traded.
 44 |         * does not include cash (i.e. btc)
 45 |     * `"online"`
 46 |         * if it is not online, the program will select coins and generate inputs
 47 |         from the local database.
 48 |         * if it is online, new data that dose not exist in the database would be saved
 49 | 
 50 | ## Training and Tuning the hyper-parameters
 51 | 1. First, modify the `nntrader/nntrader/net_config.json` file.
 52 | 2. make sure current directory is under `nntrader` and type `python main.py --mode=generate --repeat=1`
 53 |     * this will make 1 subfolders under the `train_package`
 54 |     * in each subfolder, there is a copy of the `net_config.json`
 55 |     * `--repeat=n`, n could followed by any positive integers. The random seed of each the subfolder is from 0 to n-1 sequentially.
 56 |       * Notably, random seed could also affect the performance in a large scale.
 57 | 3. type `python main.py --mode=train --processes=1`
 58 |     * this will start training one by one of the n folder created just now
 59 |     * do not start more than 1 processes if you want to download data online
 60 |     * "--processes=n" means start n processes running parallely.
 61 |     * add "--device=gpu" if your tensorflow support gpu.
 62 |       * On GTX1080Ti you should be able to run 4-5 training process together.
 63 |       * On GTX1060 you should be able to run 2-3 training together.
 64 |     * Each training process is made up from 2 stages:
 65 |       * Pre-training, log example:
 66 |       
 67 |       
 68 | ```
 69 | INFO:root:average time for data accessing is 0.00070324587822
 70 | INFO:root:average time for training is 0.0032548391819
 71 | INFO:root:==============================
 72 | INFO:root:step 3000
 73 | INFO:root:------------------------------
 74 | INFO:root:the portfolio value on test set is 2.24213
 75 | log_mean is 0.00029086
 76 | loss_value is -0.000291
 77 | log mean without commission fee is 0.000378
 78 | 
 79 | INFO:root:==============================
 80 | 
 81 | ```
 82 |         
 83 |         
 84 |       * Backtest with rolling train, log example:
 85 | ```
 86 |         DEBUG:root:==============================
 87 | INFO:root:the step is 1433
 88 | INFO:root:total assets are 17.732482 BTC
 89 | ```
 90 | 4. after that, check the result summary of the training in `nntrader/train_package/train_summary.csv`
 91 | 5. tune the hyper-parameters based on the summary, and go to 1 again.
 92 | 
 93 | ## Logging
 94 | There are three types of logging of each training.
 95 | * In each subfolder
 96 |     * There is a text file called `programlog`, which is the log generated by the running programming.
 97 |     * There is a `tensorboard` folder saves the data about the training process which could be viewed by tensorboard.
 98 |         * type `tensorboard --logdir=train_package/1` to use tensorboard
 99 | * The summary infomation of this training, including network configuration, portfolio value on validation set and test set etc., will be saved in the `train_summary.csv` under `train_pakage` folder
100 | 
101 | ## Save and Restore of the Model
102 | * The trained weights of the network are saved at `train_package/1` named as `netfile` (including 3 files). 
103 | 
104 | ## Download Data
105 | * Type `python main.py --mode=download_data` you can download data without starting training
106 | * The program will use the configurations in `nntrader/nntrader/net_config` to select coins and
107 |   download necessary data to train the network.
108 | * The downloading speed could be very slow and sometimes even have error in China.
109 | * For those who cann't download data, please check the first release where there is a `Data.db` file, put it in the database folder. Make sure the `online` in `input` in `net_config.json` to be `false` and run the example.
110 |   * Note that using the this file, you shouldn't make any changes to input data configuration(For example `start_date`, `end_date` or `coin_number`) otherwise incorrect result might be presented.
111 |   
112 | ## Back-test
113 | *Note: Before back-testing, you need to suceessfully finish training of algo first*
114 | * Type `python main.py --mode=backtest --algo=1` to execute
115 | backtest with rolling train(i.e. online learning in supervised learning)
116 | on the target model.
117 | * `--algo` could be either the name of traditional method or the index of training folder
118 | 
119 | ## Tradition Agent
120 | OLPS summary:
121 | 
122 | ![](https://github.com/DexHunter/nntrader/blob/dev/images/olps_algo.png)
123 | 
124 | ## Plotting
125 | * type `python main.py --mode=plot --algos=crp,olmar,1 --labels=crp,olmar,nnagent
126 | `,for example, to plot
127 | * `--algos` could be the name of the tdagent algorithms or
128 | the index of nnagent
129 | * `--labels` is the name of related algorithm that will be shown in the legend
130 | * result is
131 | ![](http://static.zybuluo.com/rooftrellen/u75egf9roy9c2sju48v6uu6o/result.png)
132 | 
133 | ## present backtest results in a table
134 | * type `python main.py --mode=table --algos=1,olmar,ons --labels=nntrader,olmar,ons`
135 | * `--algos` and `--lables` are the same as in plotting case
136 | * result:
137 | ```
138 |            average  max drawdown  negative day  negative periods  negative week  portfolio value  positive periods  postive day  postive week  sharpe ratio
139 | nntrader  1.001311      0.225874           781              1378            114        25.022516              1398         1995          2662      0.074854
140 | olmar     1.000752      0.604886          1339              1451           1217         4.392879              1319         1437          1559      0.035867
141 | ons       1.000231      0.217216          1144              1360            731         1.770931              1416         1632          2045      0.032605
142 | 
143 | ```
144 | * use `--format` arguments to change the format of the table,
145 |  could be `raw` `html` `csv` or `latex`. The default one is raw.
146 | 


--------------------------------------------------------------------------------
/pgportfolio/tdagent/tdagent.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import numpy as np
  3 | import logging
  4 | from scipy.optimize import minimize
  5 | from scipy.spatial.distance import cdist, euclidean
  6 | 
  7 | class TDAgent(object):
  8 |     '''Traditional Agent.
  9 |     parent class for algorithms(new-style)
 10 |     '''
 11 | 
 12 |     def __init__(self, history=None, cum_ret=None, last_b=None):
 13 |         '''init
 14 |         :param X: input
 15 |         :param history: a history list of relative price vector
 16 |         '''
 17 |         self.history = history
 18 |         self.cum_ret = cum_ret
 19 |         self.last_b = last_b
 20 | 
 21 |     @property
 22 |     def agent(self):
 23 |         return self._agent
 24 | 
 25 | 
 26 |     def decide_by_history(self, x, last_b):
 27 |         '''calculate new portfolio weight vector.
 28 |         :param x: input x
 29 |         :last_b: last portfolio weight vector
 30 |         '''
 31 |         raise NotImplementedError('subclass must implement this!')
 32 | 
 33 |     def get_last_rpv(self, x):
 34 |         '''remove dimension of input. Return last relative price vector.
 35 |         :param x: matrix with shape (1, window_size, coin_number+1)
 36 |         '''
 37 |         if x.ndim == 3:
 38 |             #print x.shape
 39 |             last_rpv = x[0,:,-1] # output a vector with shape (x.size,)
 40 |         else:
 41 |             last_rpv = x #if it has already been processed just return x
 42 |         return last_rpv
 43 | 
 44 |     def get_first_history(self, x):
 45 |         '''get history in first period
 46 |         :param x: input matrix
 47 |         '''
 48 |         if x.ndim == 3:
 49 |             first = x[0,:,:] # array size (#assets, #periods)
 50 | 
 51 |         #return (#periods, #assets) for convention
 52 |         return first.T
 53 | 
 54 |     def record_history(self, x):
 55 |         nx = self.get_last_rpv(x)
 56 |         nx = np.reshape(nx, (1,nx.size))
 57 |         if self.history is None:
 58 |             #self.history = self.get_first_history(x)
 59 |             self.history = nx
 60 |         else:
 61 |             self.history = np.vstack((self.history, nx))
 62 | 
 63 |     def get_close(self):
 64 |         '''get close data from relative price
 65 |         :param x: relative price data
 66 |         '''
 67 |         close = np.ones(self.history.shape)
 68 |         for i in range(1,self.history.shape[0]):
 69 |             close[i,:] = close[i-1] * self.history[i,:]
 70 |         return close
 71 | 
 72 |     def simplex_proj(self, y):
 73 |         '''projection of y onto simplex. '''
 74 |         m = len(y)
 75 |         bget = False
 76 | 
 77 |         s = sorted(y, reverse = True)
 78 |         tmpsum = 0.
 79 | 
 80 |         for ii in range(m-1):
 81 |             tmpsum = tmpsum + s[ii]
 82 |             tmax = (tmpsum - 1) / (ii + 1)
 83 |             if tmax >= s[ii+1]:
 84 |                 bget = True
 85 |                 break
 86 | 
 87 |         if not bget:
 88 |             tmax = (tmpsum + s[m-1] - 1) / m
 89 | 
 90 |         return np.maximum(0, y-tmax)
 91 | 
 92 |     def get_last_return(self, last_b):
 93 |         '''Caulate daily retrun. No need to calculate transaction cost there.
 94 |         '''
 95 |         last_x = self.history[-1,:]
 96 |         self.ret = last_b * last_x #element-wise
 97 |         return np.squeeze(self.ret)
 98 | 
 99 |     def cal_cum_ret(self, ret):
100 |         '''Calculate the cumulative return.
101 |         :param ret: newest retrun
102 |         '''
103 |         if self.cum_ret is None:
104 |             self.cum_ret = ret
105 |         else:
106 |             self.cum_ret = self.cum_ret * ret #element-wise
107 |         return self.cum_ret
108 | 
109 |     def find_bcrp(self, X, max_leverage=1):
110 |         x_0 = max_leverage * np.ones(X.shape[1]) / np.float(X.shape[1])
111 |         objective = lambda b: -np.prod(np.dot(X, b))
112 |         cons = ({'type': 'eq', 'fun': lambda b: max_leverage - np.sum(b, axis=0)},)
113 |         bnds = [(0., max_leverage)]*len(x_0)
114 |         while True:
115 |             res = minimize(objective, x_0, bounds=bnds, constraints=cons, method='slsqp')
116 |             eps = 1e-7
117 |             if (res.x < 0-eps).any() or (res.x > max_leverage+eps).any():
118 |                 X = X + np.random.randn(1)[0] * 1e-5
119 |                 logging.debug('Optimal weights not found, trying again...')
120 |                 continue
121 |             elif res.success:
122 |                 break
123 |             else:
124 |                 if np.isnan(res.x).any():
125 |                     logging.warning('Solution does not exist, use uniform portfolio weight vector.')
126 |                     res.x = np.ones(X.shape[1]) / X.shape[1]
127 |                 else:
128 |                     logging.warning('Converged but not successfully.')
129 |                 break
130 | 
131 |         return res.x
132 | 
133 | 
134 |     def euclidean_proj_simplex(self, v, s=1):
135 |         '''Compute the Euclidean projection on a positive simplex
136 |         :param v: n-dimensional vector to project
137 |         :param s: int, radius of the simple
138 | 
139 |         return w numpy array, Euclidean projection of v on the simplex
140 | 
141 |         Original author: John Duchi
142 |         '''
143 |         assert s>0, "Radius s must be positive (%d <= 0)" % s
144 | 
145 |         n, = v.shape # raise ValueError if v is not 1D
146 |         # check if already on the simplex
147 |         if v.sum() == s and np.alltrue( v>= 0):
148 |             return v
149 | 
150 |         # get the array of cumulaive sums of a sorted copy of v
151 |         u = np.sort(v)[::-1]
152 |         cssv = np.cumsum(u)
153 |         # get the number of >0 components of the optimal solution
154 |         rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
155 |         # compute the Lagrange multiplier associated to the simplex constraint
156 |         theta = (cssv[rho] - s) / (rho + 1.)
157 |         w = (v-theta).clip(min=0)
158 |         return w
159 | 
160 |     def l1_median_VaZh(self, X, eps=1e-5):
161 |         '''calculate the L1_median of X with the l1median_VaZh method
162 |         '''
163 |         y = np.mean(X, 0)
164 | 
165 |         while True:
166 |             D = cdist(X, [y])
167 |             nonzeros = (D != 0)[:, 0]
168 | 
169 |             Dinv = 1 / D[nonzeros]
170 |             Dinvs = np.sum(Dinv)
171 |             W = Dinv / Dinvs
172 |             T = np.sum(W * X[nonzeros], 0)
173 |             num_zeros = len(X) - np.sum(nonzeros)
174 |             if num_zeros == 0:
175 |                 y1 = T
176 |             elif num_zeros == len(X):
177 |                 return y
178 |             else:
179 |                 R = (T - y) * Dinvs
180 |                 r = np.linalg.norm(R)
181 |                 rinv = 0 if r==0 else num_zeros/r
182 |                 y1 = max(0, 1-rinv)*T + min(1, rinv)*y
183 | 
184 |             if euclidean(y, y1) < eps:
185 |                 return y1
186 | 
187 |             y = y1
188 | 
189 |     def corn_expert(self, data, w, c):
190 |         '''
191 |         :param w: window sze
192 |         :param c: correlation coefficient threshold
193 |         '''
194 |         T, N = data.shape
195 |         m = 0
196 |         histdata = np.zeros((T,N))
197 | 
198 |         if T <= w+1:
199 |             '''use uniform portfolio weight vector'''
200 |             return np.ones(N) / N
201 | 
202 |         if w==0:
203 |             histdata = data[:T,:]
204 |             m = T
205 |         else:
206 |             for i in np.arange(w, T):
207 |                 d1 = data[i-w:i,:]
208 |                 d2 = data[T-w:T,:]
209 |                 datacorr = np.corrcoef(d1,d2)[0,1]
210 | 
211 |                 if datacorr >= c:
212 |                     m += 1
213 |                     histdata[m,:] = data[i-1,:] #minus one to avoid out of bounds issue
214 | 
215 |         if m==0:
216 |             return np.ones(N) / N
217 | 
218 |         #sqp according to OLPS implementation
219 |         x_0 = np.ones((1,N)) / N
220 |         objective = lambda b: -np.prod(np.dot(histdata, b))
221 |         cons = ({'type': 'eq', 'fun': lambda b: 1-np.sum(b, axis=0)},)
222 |         bnds = [(0.,1)]*N
223 |         while True:
224 |             res = minimize(objective, x_0, bounds=bnds, constraints=cons, method='slsqp')
225 |             eps = 1e-7
226 |             if (res.x < 0-eps).any() or (res.x > 1+eps).any():
227 |                 data += np.random.randn(1)[0] * 1e-5
228 |                 logging.debug('Optimal portfolio weight vector not found, trying again...')
229 |                 continue
230 |             elif res.success:
231 |                 break
232 |             else:
233 |                 if np.isnan(res.x).any():
234 |                     logging.warning('Solution does not exist, use uniform pwv')
235 |                     res.x = np.ones(N) / N
236 |                 else:
237 |                     logging.warning('Converged but not successfully.')
238 |                 break
239 | 
240 |         return res.x
241 | 


--------------------------------------------------------------------------------
/pgportfolio/learn/network.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | from __future__ import print_function
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | import tensorflow as tf
  7 | import tflearn
  8 | 
  9 | 
 10 | class NeuralNetWork:
 11 |     def __init__(self, feature_number, rows, columns, layers, device):
 12 |         tf_config = tf.ConfigProto()
 13 |         self.session = tf.Session(config=tf_config)
 14 |         if device == "cpu":
 15 |             tf_config.gpu_options.per_process_gpu_memory_fraction = 0
 16 |         else:
 17 |             tf_config.gpu_options.per_process_gpu_memory_fraction = 0.2
 18 |         self.input_num = tf.placeholder(tf.int32, shape=[])
 19 |         self.input_tensor = tf.placeholder(tf.float32, shape=[None, feature_number, rows, columns])
 20 |         self.previous_w = tf.placeholder(tf.float32, shape=[None, rows])
 21 |         self._rows = rows
 22 |         self._columns = columns
 23 | 
 24 |         self.layers_dict = {}
 25 |         self.layer_count = 0
 26 | 
 27 |         self.output = self._build_network(layers)
 28 | 
 29 |     def _build_network(self, layers):
 30 |         pass
 31 | 
 32 | 
 33 | class CNN(NeuralNetWork):
 34 |     # input_shape (features, rows, columns)
 35 |     def __init__(self, feature_number, rows, columns, layers, device):
 36 |         NeuralNetWork.__init__(self, feature_number, rows, columns, layers, device)
 37 | 
 38 |     def add_layer_to_dict(self, layer_type, tensor, weights=True):
 39 | 
 40 |         self.layers_dict[layer_type + '_' + str(self.layer_count) + '_activation'] = tensor
 41 |         self.layer_count += 1
 42 | 
 43 |     # grenrate the operation, the forward computaion
 44 |     def _build_network(self, layers):
 45 |         network = tf.transpose(self.input_tensor, [0, 2, 3, 1])
 46 |         # [batch, assets, window, features]
 47 |         network = network / network[:, :, -1, 0, None, None]
 48 |         for layer_number, layer in enumerate(layers):
 49 |             if layer["type"] == "DenseLayer":
 50 |                 network = tflearn.layers.core.fully_connected(network,
 51 |                                                               int(layer["neuron_number"]),
 52 |                                                               layer["activation_function"],
 53 |                                                               regularizer=layer["regularizer"],
 54 |                                                               weight_decay=layer["weight_decay"] )
 55 |                 self.add_layer_to_dict(layer["type"], network)
 56 |             elif layer["type"] == "DropOut":
 57 |                 network = tflearn.layers.core.dropout(network, layer["keep_probability"])
 58 |             elif layer["type"] == "EIIE_Dense":
 59 |                 width = network.get_shape()[2]
 60 |                 network = tflearn.layers.conv_2d(network, int(layer["filter_number"]),
 61 |                                                  [1, width],
 62 |                                                  [1, 1],
 63 |                                                  "valid",
 64 |                                                  layer["activation_function"],
 65 |                                                  regularizer=layer["regularizer"],
 66 |                                                  weight_decay=layer["weight_decay"])
 67 |                 self.add_layer_to_dict(layer["type"], network)
 68 |             elif layer["type"] == "ConvLayer":
 69 |                 network = tflearn.layers.conv_2d(network, int(layer["filter_number"]),
 70 |                                                  allint(layer["filter_shape"]),
 71 |                                                  allint(layer["strides"]),
 72 |                                                  layer["padding"],
 73 |                                                  layer["activation_function"],
 74 |                                                  regularizer=layer["regularizer"],
 75 |                                                  weight_decay=layer["weight_decay"])
 76 |                 self.add_layer_to_dict(layer["type"], network)
 77 |             elif layer["type"] == "MaxPooling":
 78 |                 network = tflearn.layers.conv.max_pool_2d(network, layer["strides"])
 79 |             elif layer["type"] == "AveragePooling":
 80 |                 network = tflearn.layers.conv.avg_pool_2d(network, layer["strides"])
 81 |             elif layer["type"] == "LocalResponseNormalization":
 82 |                 network = tflearn.layers.normalization.local_response_normalization(network)
 83 |             elif layer["type"] == "EIIE_Output":
 84 |                 width = network.get_shape()[2]
 85 |                 network = tflearn.layers.conv_2d(network, 1, [1, width], padding="valid",
 86 |                                                  regularizer=layer["regularizer"],
 87 |                                                  weight_decay=layer["weight_decay"])
 88 |                 self.add_layer_to_dict(layer["type"], network)
 89 |                 network = network[:, :, 0, 0]
 90 |                 btc_bias = tf.ones((self.input_num, 1))
 91 |                 self.add_layer_to_dict(layer["type"], network)
 92 |                 network = tf.concat([btc_bias, network], 1)
 93 |                 network = tflearn.layers.core.activation(network, activation="softmax")
 94 |                 self.add_layer_to_dict(layer["type"], network, weights=False)
 95 |             elif layer["type"] == "Output_WithW":
 96 |                 network = tflearn.flatten(network)
 97 |                 network = tf.concat([network,self.previous_w], axis=1)
 98 |                 network = tflearn.fully_connected(network, self._rows+1,
 99 |                                                   activation="softmax",
100 |                                                   regularizer=layer["regularizer"],
101 |                                                   weight_decay=layer["weight_decay"])
102 |             elif layer["type"] == "EIIE_Output_WithW":
103 |                 width = network.get_shape()[2]
104 |                 height = network.get_shape()[1]
105 |                 features = network.get_shape()[3]
106 |                 network = tf.reshape(network, [self.input_num, int(height), 1, int(width*features)])
107 |                 w = tf.reshape(self.previous_w, [-1, int(height), 1, 1])
108 |                 network = tf.concat([network, w], axis=3)
109 |                 network = tflearn.layers.conv_2d(network, 1, [1, 1], padding="valid",
110 |                                                  regularizer=layer["regularizer"],
111 |                                                  weight_decay=layer["weight_decay"])
112 |                 self.add_layer_to_dict(layer["type"], network)
113 |                 network = network[:, :, 0, 0]
114 |                 #btc_bias = tf.zeros((self.input_num, 1))
115 |                 btc_bias = tf.get_variable("btc_bias", [1, 1], dtype=tf.float32,
116 |                                        initializer=tf.zeros_initializer)
117 |                 # self.add_layer_to_dict(layer["type"], network, weights=False)
118 |                 btc_bias = tf.tile(btc_bias, [self.input_num, 1])
119 |                 network = tf.concat([btc_bias, network], 1)
120 |                 self.voting = network
121 |                 self.add_layer_to_dict('voting', network, weights=False)
122 |                 network = tflearn.layers.core.activation(network, activation="softmax")
123 |                 self.add_layer_to_dict('softmax_layer', network, weights=False)
124 | 
125 |             elif layer["type"] == "EIIE_LSTM" or\
126 |                             layer["type"] == "EIIE_RNN":
127 |                 network = tf.transpose(network, [0, 2, 3, 1])
128 |                 resultlist = []
129 |                 reuse = False
130 |                 for i in range(self._rows):
131 |                     if i > 0:
132 |                         reuse = True
133 |                     if layer["type"] == "EIIE_LSTM":
134 |                         result = tflearn.layers.lstm(network[:, :, :, i],
135 |                                                      int(layer["neuron_number"]),
136 |                                                      dropout=layer["dropouts"],
137 |                                                      scope="lstm"+str(layer_number),
138 |                                                      reuse=reuse)
139 |                     else:
140 |                         result = tflearn.layers.simple_rnn(network[:, :, :, i],
141 |                                                            int(layer["neuron_number"]),
142 |                                                            dropout=layer["dropouts"],
143 |                                                            scope="rnn"+str(layer_number),
144 |                                                            reuse=reuse)
145 |                     resultlist.append(result)
146 |                 network = tf.stack(resultlist)
147 |                 network = tf.transpose(network, [1, 0, 2])
148 |                 network = tf.reshape(network, [-1, self._rows, 1, int(layer["neuron_number"])])
149 |             else:
150 |                 raise ValueError("the layer {} not supported.".format(layer["type"]))
151 |         return network
152 | 
153 | 
154 | def allint(l):
155 |     return [int(i) for i in l]
156 | 
157 | 


--------------------------------------------------------------------------------
/pgportfolio/marketdata/datamatrices.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | import pgportfolio.marketdata.globaldatamatrix as gdm
  5 | import numpy as np
  6 | import pandas as pd
  7 | import logging
  8 | from pgportfolio.tools.configprocess import parse_time
  9 | from pgportfolio.tools.data import get_volume_forward, get_type_list
 10 | import pgportfolio.marketdata.replaybuffer as rb
 11 | 
 12 | MIN_NUM_PERIOD = 3
 13 | 
 14 | 
 15 | class DataMatrices:
 16 |     def __init__(self, start, end, period, batch_size=50, volume_average_days=30, buffer_bias_ratio=0,
 17 |                  market="poloniex", coin_filter=1, window_size=50, feature_number=3, test_portion=0.15,
 18 |                  portion_reversed=False, online=False, is_permed=False):
 19 |         """
 20 |         :param start: Unix time
 21 |         :param end: Unix time
 22 |         :param access_period: the data access period of the input matrix.
 23 |         :param trade_period: the trading period of the agent.
 24 |         :param global_period: the data access period of the global price matrix.
 25 |                               if it is not equal to the access period, there will be inserted observations
 26 |         :param coin_filter: number of coins that would be selected
 27 |         :param window_size: periods of input data
 28 |         :param train_portion: portion of training set
 29 |         :param is_permed: if False, the sample inside a mini-batch is in order
 30 |         :param validation_portion: portion of cross-validation set
 31 |         :param test_portion: portion of test set
 32 |         :param portion_reversed: if False, the order to sets are [train, validation, test]
 33 |         else the order is [test, validation, train]
 34 |         """
 35 |         start = int(start)
 36 |         self.__end = int(end)
 37 | 
 38 |         # assert window_size >= MIN_NUM_PERIOD
 39 |         self.__coin_no = coin_filter
 40 |         type_list = get_type_list(feature_number)
 41 |         self.__features = type_list
 42 |         self.feature_number = feature_number
 43 |         volume_forward = get_volume_forward(self.__end-start, test_portion, portion_reversed)
 44 |         self.__history_manager = gdm.HistoryManager(coin_number=coin_filter, end=self.__end,
 45 |                                                     volume_average_days=volume_average_days,
 46 |                                                     volume_forward=volume_forward, online=online)
 47 |         if market == "poloniex":
 48 |             self.__global_data = self.__history_manager.get_global_panel(start,
 49 |                                                                          self.__end,
 50 |                                                                          period=period,
 51 |                                                                          features=type_list)
 52 |         else:
 53 |             raise ValueError("market {} is not valid".format(market))
 54 |         self.__period_length = period
 55 |         # portfolio vector memory, [time, assets]
 56 |         self.__PVM = pd.DataFrame(index=self.__global_data.minor_axis,
 57 |                                   columns=self.__global_data.major_axis)
 58 |         self.__PVM = self.__PVM.fillna(1.0 / self.__coin_no)
 59 | 
 60 |         self._window_size = window_size
 61 |         self._num_periods = len(self.__global_data.minor_axis)
 62 |         self.__divide_data(test_portion, portion_reversed)
 63 | 
 64 |         self._portion_reversed = portion_reversed
 65 |         self.__is_permed = is_permed
 66 | 
 67 |         self.__batch_size = batch_size
 68 |         self.__delta = 0  # the count of global increased
 69 |         end_index = self._train_ind[-1]
 70 |         self.__replay_buffer = rb.ReplayBuffer(start_index=self._train_ind[0],
 71 |                                                end_index=end_index,
 72 |                                                sample_bias=buffer_bias_ratio,
 73 |                                                batch_size=self.__batch_size,
 74 |                                                coin_number=self.__coin_no,
 75 |                                                is_permed=self.__is_permed)
 76 | 
 77 |         logging.info("the number of training examples is %s"
 78 |                      ", of test examples is %s" % (self._num_train_samples, self._num_test_samples))
 79 |         logging.debug("the training set is from %s to %s" % (min(self._train_ind), max(self._train_ind)))
 80 |         logging.debug("the test set is from %s to %s" % (min(self._test_ind), max(self._test_ind)))
 81 | 
 82 |     @property
 83 |     def global_weights(self):
 84 |         return self.__PVM
 85 | 
 86 |     @staticmethod
 87 |     def create_from_config(config):
 88 |         """main method to create the DataMatrices in this project
 89 |         @:param config: config dictionary
 90 |         @:return: a DataMatrices object
 91 |         """
 92 |         config = config.copy()
 93 |         input_config = config["input"]
 94 |         train_config = config["training"]
 95 |         start = parse_time(input_config["start_date"])
 96 |         end = parse_time(input_config["end_date"])
 97 |         return DataMatrices(start=start,
 98 |                             end=end,
 99 |                             market=input_config["market"],
100 |                             feature_number=input_config["feature_number"],
101 |                             window_size=input_config["window_size"],
102 |                             online=input_config["online"],
103 |                             period=input_config["global_period"],
104 |                             coin_filter=input_config["coin_number"],
105 |                             is_permed=input_config["is_permed"],
106 |                             buffer_bias_ratio=train_config["buffer_biased"],
107 |                             batch_size=train_config["batch_size"],
108 |                             volume_average_days=input_config["volume_average_days"],
109 |                             test_portion=input_config["test_portion"],
110 |                             portion_reversed=input_config["portion_reversed"],
111 |                             )
112 | 
113 |     @property
114 |     def global_matrix(self):
115 |         return self.__global_data
116 | 
117 |     @property
118 |     def coin_list(self):
119 |         return self.__history_manager.coins
120 | 
121 |     @property
122 |     def num_train_samples(self):
123 |         return self._num_train_samples
124 | 
125 |     @property
126 |     def test_indices(self):
127 |         return self._test_ind[:-(self._window_size+1):]
128 | 
129 |     @property
130 |     def num_test_samples(self):
131 |         return self._num_test_samples
132 | 
133 |     def append_experience(self, online_w=None):
134 |         """
135 |         :param online_w: (number of assets + 1, ) numpy array
136 |         Let it be None if in the backtest case.
137 |         """
138 |         self.__delta += 1
139 |         self._train_ind.append(self._train_ind[-1]+1)
140 |         appended_index = self._train_ind[-1]
141 |         self.__replay_buffer.append_experience(appended_index)
142 | 
143 |     def get_test_set(self):
144 |         return self.__pack_samples(self.test_indices)
145 | 
146 |     def get_training_set(self):
147 |         return self.__pack_samples(self._train_ind[:-self._window_size])
148 | 
149 |     def next_batch(self):
150 |         """
151 |         @:return: the next batch of training sample. The sample is a dictionary
152 |         with key "X"(input data); "y"(future relative price); "last_w" a numpy array
153 |         with shape [batch_size, assets]; "w" a list of numpy arrays list length is
154 |         batch_size
155 |         """
156 |         batch = self.__pack_samples([exp.state_index for exp in self.__replay_buffer.next_experience_batch()])
157 |         return batch
158 | 
159 |     def __pack_samples(self, indexs):
160 |         indexs = np.array(indexs)
161 |         last_w = self.__PVM.values[indexs-1, :]
162 | 
163 |         def setw(w):
164 |             self.__PVM.iloc[indexs, :] = w
165 |         M = [self.get_submatrix(index) for index in indexs]
166 |         M = np.array(M)
167 |         X = M[:, :, :, :-1]
168 |         y = M[:, :, :, -1] / M[:, 0, None, :, -2]
169 |         return {"X": X, "y": y, "last_w": last_w, "setw": setw}
170 | 
171 |     # volume in y is the volume in next access period
172 |     def get_submatrix(self, ind):
173 |         return self.__global_data.values[:, :, ind:ind+self._window_size+1]
174 | 
175 |     def __divide_data(self, test_portion, portion_reversed):
176 |         train_portion = 1 - test_portion
177 |         s = float(train_portion + test_portion)
178 |         if portion_reversed:
179 |             portions = np.array([test_portion]) / s
180 |             portion_split = (portions * self._num_periods).astype(int)
181 |             indices = np.arange(self._num_periods)
182 |             self._test_ind, self._train_ind = np.split(indices, portion_split)
183 |         else:
184 |             portions = np.array([train_portion]) / s
185 |             portion_split = (portions * self._num_periods).astype(int)
186 |             indices = np.arange(self._num_periods)
187 |             self._train_ind, self._test_ind = np.split(indices, portion_split)
188 | 
189 |         self._train_ind = self._train_ind[:-(self._window_size + 1)]
190 |         # NOTE(zhengyao): change the logic here in order to fit both
191 |         # reversed and normal version
192 |         self._train_ind = list(self._train_ind)
193 |         self._num_train_samples = len(self._train_ind)
194 |         self._num_test_samples = len(self.test_indices)
195 | 


--------------------------------------------------------------------------------
/pgportfolio/learn/nnagent.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | import tflearn
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | from pgportfolio.constants import *
  6 | import pgportfolio.learn.network as network
  7 | 
  8 | class NNAgent:
  9 |     def __init__(self, config, restore_dir=None, device="cpu"):
 10 |         self.__config = config
 11 |         self.__coin_number = config["input"]["coin_number"]
 12 |         self.__net = network.CNN(config["input"]["feature_number"],
 13 |                                  self.__coin_number,
 14 |                                  config["input"]["window_size"],
 15 |                                  config["layers"],
 16 |                                  device=device)
 17 |         self.__global_step = tf.Variable(0, trainable=False)
 18 |         self.__train_operation = None
 19 |         self.__y = tf.placeholder(tf.float32, shape=[None,
 20 |                                                      self.__config["input"]["feature_number"],
 21 |                                                      self.__coin_number])
 22 |         self.__future_price = tf.concat([tf.ones([self.__net.input_num, 1]),
 23 |                                        self.__y[:, 0, :]], 1)
 24 |         self.__future_omega = (self.__future_price * self.__net.output) /\
 25 |                               tf.reduce_sum(self.__future_price * self.__net.output, axis=1)[:, None]
 26 |         # tf.assert_equal(tf.reduce_sum(self.__future_omega, axis=1), tf.constant(1.0))
 27 |         self.__commission_ratio = self.__config["trading"]["trading_consumption"]
 28 |         self.__pv_vector = tf.reduce_sum(self.__net.output * self.__future_price, reduction_indices=[1]) *\
 29 |                            (tf.concat([tf.ones(1), self.__pure_pc()], axis=0))
 30 |         self.__log_mean_free = tf.reduce_mean(tf.log(tf.reduce_sum(self.__net.output * self.__future_price,
 31 |                                                                    reduction_indices=[1])))
 32 |         self.__portfolio_value = tf.reduce_prod(self.__pv_vector)
 33 |         self.__mean = tf.reduce_mean(self.__pv_vector)
 34 |         self.__log_mean = tf.reduce_mean(tf.log(self.__pv_vector))
 35 |         self.__standard_deviation = tf.sqrt(tf.reduce_mean((self.__pv_vector - self.__mean) ** 2))
 36 |         self.__sharp_ratio = (self.__mean - 1) / self.__standard_deviation
 37 |         self.__loss = self.__set_loss_function()
 38 |         self.__train_operation = self.init_train(learning_rate=self.__config["training"]["learning_rate"],
 39 |                                                  decay_steps=self.__config["training"]["decay_steps"],
 40 |                                                  decay_rate=self.__config["training"]["decay_rate"],
 41 |                                                  training_method=self.__config["training"]["training_method"])
 42 |         self.__saver = tf.train.Saver()
 43 |         if restore_dir:
 44 |             self.__saver.restore(self.__net.session, restore_dir)
 45 |         else:
 46 |             self.__net.session.run(tf.global_variables_initializer())
 47 | 
 48 |     @property
 49 |     def session(self):
 50 |         return self.__net.session
 51 | 
 52 |     @property
 53 |     def pv_vector(self):
 54 |         return self.__pv_vector
 55 | 
 56 |     @property
 57 |     def standard_deviation(self):
 58 |         return self.__standard_deviation
 59 | 
 60 |     @property
 61 |     def portfolio_weights(self):
 62 |         return self.__net.output
 63 | 
 64 |     @property
 65 |     def sharp_ratio(self):
 66 |         return self.__sharp_ratio
 67 | 
 68 |     @property
 69 |     def log_mean(self):
 70 |         return self.__log_mean
 71 | 
 72 |     @property
 73 |     def log_mean_free(self):
 74 |         return self.__log_mean_free
 75 | 
 76 |     @property
 77 |     def portfolio_value(self):
 78 |         return self.__portfolio_value
 79 | 
 80 |     @property
 81 |     def loss(self):
 82 |         return self.__loss
 83 | 
 84 |     @property
 85 |     def layers_dict(self):
 86 |         return self.__net.layers_dict
 87 | 
 88 |     def recycle(self):
 89 |         tf.reset_default_graph()
 90 |         self.__net.session.close()
 91 | 
 92 |     def __set_loss_function(self):
 93 |         def loss_function4():
 94 |             return -tf.reduce_mean(tf.log(tf.reduce_sum(self.__net.output[:] * self.__future_price,
 95 |                                                         reduction_indices=[1])))
 96 | 
 97 |         def loss_function5():
 98 |             return -tf.reduce_mean(tf.log(tf.reduce_sum(self.__net.output * self.__future_price, reduction_indices=[1]))) + \
 99 |                    LAMBDA * tf.reduce_mean(tf.reduce_sum(-tf.log(1 + 1e-6 - self.__net.output), reduction_indices=[1]))
100 | 
101 |         def loss_function6():
102 |             return -tf.reduce_mean(tf.log(self.pv_vector))
103 | 
104 |         def loss_function7():
105 |             return -tf.reduce_mean(tf.log(self.pv_vector)) + \
106 |                    LAMBDA * tf.reduce_mean(tf.reduce_sum(-tf.log(1 + 1e-6 - self.__net.output), reduction_indices=[1]))
107 | 
108 |         def with_last_w():
109 |             return -tf.reduce_mean(tf.log(tf.reduce_sum(self.__net.output[:] * self.__future_price, reduction_indices=[1])
110 |                                           -tf.reduce_sum(tf.abs(self.__net.output[:, 1:] - self.__net.previous_w)
111 |                                                          *self.__commission_ratio, reduction_indices=[1])))
112 | 
113 |         loss_function = loss_function5
114 |         if self.__config["training"]["loss_function"] == "loss_function4":
115 |             loss_function = loss_function4
116 |         elif self.__config["training"]["loss_function"] == "loss_function5":
117 |             loss_function = loss_function5
118 |         elif self.__config["training"]["loss_function"] == "loss_function6":
119 |             loss_function = loss_function6
120 |         elif self.__config["training"]["loss_function"] == "loss_function7":
121 |             loss_function = loss_function7
122 |         elif self.__config["training"]["loss_function"] == "loss_function8":
123 |             loss_function = with_last_w
124 | 
125 |         loss_tensor = loss_function()
126 |         regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
127 |         if regularization_losses:
128 |             for regularization_loss in regularization_losses:
129 |                 loss_tensor += regularization_loss
130 |         return loss_tensor
131 | 
132 |     def init_train(self, learning_rate, decay_steps, decay_rate, training_method):
133 |         learning_rate = tf.train.exponential_decay(learning_rate, self.__global_step,
134 |                                                    decay_steps, decay_rate, staircase=True)
135 |         if training_method == 'GradientDescent':
136 |             train_step = tf.train.GradientDescentOptimizer(learning_rate).\
137 |                          minimize(self.__loss, global_step=self.__global_step)
138 |         elif training_method == 'Adam':
139 |             train_step = tf.train.AdamOptimizer(learning_rate).\
140 |                          minimize(self.__loss, global_step=self.__global_step)
141 |         elif training_method == 'RMSProp':
142 |             train_step = tf.train.RMSPropOptimizer(learning_rate).\
143 |                          minimize(self.__loss, global_step=self.__global_step)
144 |         else:
145 |             raise ValueError()
146 |         return train_step
147 | 
148 |     def train(self, x, y, last_w, setw):
149 |         tflearn.is_training(True, self.__net.session)
150 |         self.evaluate_tensors(x, y, last_w, setw, [self.__train_operation])
151 | 
152 |     def evaluate_tensors(self, x, y, last_w, setw, tensors):
153 |         """
154 |         :param x:
155 |         :param y:
156 |         :param last_w:
157 |         :param setw: a function, pass the output w to it to fill the PVM
158 |         :param tensors:
159 |         :return:
160 |         """
161 |         tensors = list(tensors)
162 |         tensors.append(self.__net.output)
163 |         assert not np.any(np.isnan(x))
164 |         assert not np.any(np.isnan(y))
165 |         assert not np.any(np.isnan(last_w)),\
166 |             "the last_w is {}".format(last_w)
167 |         results = self.__net.session.run(tensors,
168 |                                          feed_dict={self.__net.input_tensor: x,
169 |                                                     self.__y: y,
170 |                                                     self.__net.previous_w: last_w,
171 |                                                     self.__net.input_num: x.shape[0]})
172 |         setw(results[-1][:, 1:])
173 |         return results[:-1]
174 | 
175 |     # save the variables path including file name
176 |     def save_model(self, path):
177 |         self.__saver.save(self.__net.session, path)
178 | 
179 |     # consumption vector (on each periods)
180 |     def __pure_pc(self):
181 |         c = self.__commission_ratio
182 |         w_t = self.__future_omega[:self.__net.input_num-1]  # rebalanced
183 |         w_t1 = self.__net.output[1:self.__net.input_num]
184 |         mu = 1 - tf.reduce_sum(tf.abs(w_t1[:, 1:]-w_t[:, 1:]), axis=1)*c
185 |         """
186 |         mu = 1-3*c+c**2
187 | 
188 |         def recurse(mu0):
189 |             factor1 = 1/(1 - c*w_t1[:, 0])
190 |             if isinstance(mu0, float):
191 |                 mu0 = mu0
192 |             else:
193 |                 mu0 = mu0[:, None]
194 |             factor2 = 1 - c*w_t[:, 0] - (2*c - c**2)*tf.reduce_sum(
195 |                 tf.nn.relu(w_t[:, 1:] - mu0 * w_t1[:, 1:]), axis=1)
196 |             return factor1*factor2
197 | 
198 |         for i in range(20):
199 |             mu = recurse(mu)
200 |         """
201 |         return mu
202 | 
203 |     # the history is a 3d matrix, return a asset vector
204 |     def decide_by_history(self, history, last_w):
205 |         assert isinstance(history, np.ndarray),\
206 |             "the history should be a numpy array, not %s" % type(history)
207 |         assert not np.any(np.isnan(last_w))
208 |         assert not np.any(np.isnan(history))
209 |         tflearn.is_training(False, self.session)
210 |         history = history[np.newaxis, :, :, :]
211 |         return np.squeeze(self.session.run(self.__net.output, feed_dict={self.__net.input_tensor: history,
212 |                                                                          self.__net.previous_w: last_w[np.newaxis, 1:],
213 |                                                                          self.__net.input_num: 1}))
214 | 


--------------------------------------------------------------------------------
/pgportfolio/learn/tradertrainer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | import json
  7 | import os
  8 | import time
  9 | import collections
 10 | import tflearn
 11 | import numpy as np
 12 | import pandas as pd
 13 | import tensorflow as tf
 14 | from pgportfolio.learn.nnagent import NNAgent
 15 | from pgportfolio.marketdata.datamatrices import DataMatrices
 16 | import logging
 17 | Result = collections.namedtuple("Result",
 18 |                                 [
 19 |                                  "test_pv",
 20 |                                  "test_log_mean",
 21 |                                  "test_log_mean_free",
 22 |                                  "test_history",
 23 |                                  "config",
 24 |                                  "net_dir",
 25 |                                  "backtest_test_pv",
 26 |                                  "backtest_test_history",
 27 |                                  "backtest_test_log_mean",
 28 |                                  "training_time"])
 29 | 
 30 | class TraderTrainer:
 31 |     def __init__(self, config, fake_data=False, restore_dir=None, save_path=None, device="cpu",
 32 |                  agent=None):
 33 |         """
 34 |         :param config: config dictionary
 35 |         :param fake_data: if True will use data generated randomly
 36 |         :param restore_dir: path to the model trained before
 37 |         :param save_path: path to save the model
 38 |         :param device: the device used to train the network
 39 |         :param agent: the nnagent object. If this is provides, the trainer will not
 40 |         create a new agent by itself. Therefore the restore_dir will not affect anything.
 41 |         """
 42 |         self.config = config
 43 |         self.train_config = config["training"]
 44 |         self.input_config = config["input"]
 45 |         self.save_path = save_path
 46 |         self.best_metric = 0
 47 |         np.random.seed(config["random_seed"])
 48 | 
 49 |         self.__window_size = self.input_config["window_size"]
 50 |         self.__coin_number = self.input_config["coin_number"]
 51 |         self.__batch_size = self.train_config["batch_size"]
 52 |         self.__snap_shot = self.train_config["snap_shot"]
 53 |         config["input"]["fake_data"] = fake_data
 54 | 
 55 |         self._matrix = DataMatrices.create_from_config(config)
 56 | 
 57 |         self.test_set = self._matrix.get_test_set()
 58 |         if not config["training"]["fast_train"]:
 59 |             self.training_set = self._matrix.get_training_set()
 60 |         self.upperbound_validation = 1
 61 |         self.upperbound_test = 1
 62 |         tf.set_random_seed(self.config["random_seed"])
 63 |         self.device = device
 64 |         if agent:
 65 |             self._agent = agent
 66 |         else:
 67 |             if device == "cpu":
 68 |                 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 69 |                 with tf.device("/cpu:0"):
 70 |                     self._agent = NNAgent(config, restore_dir, device)
 71 |             else:
 72 |                 self._agent = NNAgent(config, restore_dir, device)
 73 | 
 74 |     def _evaluate(self, set_name, *tensors):
 75 |         if set_name == "test":
 76 |             feed = self.test_set
 77 |         elif set_name == "training":
 78 |             feed = self.training_set
 79 |         else:
 80 |             raise ValueError()
 81 |         result = self._agent.evaluate_tensors(feed["X"],feed["y"],last_w=feed["last_w"],
 82 |                                               setw=feed["setw"], tensors=tensors)
 83 |         return result
 84 | 
 85 |     @staticmethod
 86 |     def calculate_upperbound(y):
 87 |         array = np.maximum.reduce(y[:, 0, :], 1)
 88 |         total = 1.0
 89 |         for i in array:
 90 |             total = total * i
 91 |         return total
 92 | 
 93 |     def log_between_steps(self, step):
 94 |         fast_train = self.train_config["fast_train"]
 95 |         tflearn.is_training(False, self._agent.session)
 96 | 
 97 |         summary, v_pv, v_log_mean, v_loss, log_mean_free, weights= \
 98 |             self._evaluate("test", self.summary,
 99 |                            self._agent.portfolio_value,
100 |                            self._agent.log_mean,
101 |                            self._agent.loss,
102 |                            self._agent.log_mean_free,
103 |                            self._agent.portfolio_weights)
104 |         self.test_writer.add_summary(summary, step)
105 | 
106 |         if not fast_train:
107 |             summary, loss_value = self._evaluate("training", self.summary, self._agent.loss)
108 |             self.train_writer.add_summary(summary, step)
109 | 
110 |         # print 'ouput is %s' % out
111 |         logging.info('='*30)
112 |         logging.info('step %d' % step)
113 |         logging.info('-'*30)
114 |         if not fast_train:
115 |             logging.info('training loss is %s\n' % loss_value)
116 |         logging.info('the portfolio value on test set is %s\nlog_mean is %s\n'
117 |                      'loss_value is %3f\nlog mean without commission fee is %3f\n' % \
118 |                      (v_pv, v_log_mean, v_loss, log_mean_free))
119 |         logging.info('='*30+"\n")
120 | 
121 |         if not self.__snap_shot:
122 |             self._agent.save_model(self.save_path)
123 |         elif v_pv > self.best_metric:
124 |             self.best_metric = v_pv
125 |             logging.info("get better model at %s steps,"
126 |                          " whose test portfolio value is %s" % (step, v_pv))
127 |             if self.save_path:
128 |                 self._agent.save_model(self.save_path)
129 |         self.check_abnormal(v_pv, weights)
130 | 
131 |     def check_abnormal(self, portfolio_value, weigths):
132 |         if portfolio_value == 1.0:
133 |             logging.info("average portfolio weights {}".format(weigths.mean(axis=0)))
134 | 
135 | 
136 |     def next_batch(self):
137 |         batch = self._matrix.next_batch()
138 |         batch_input = batch["X"]
139 |         batch_y = batch["y"]
140 |         batch_last_w = batch["last_w"]
141 |         batch_w = batch["setw"]
142 |         return batch_input, batch_y, batch_last_w, batch_w
143 | 
144 |     def __init_tensor_board(self, log_file_dir):
145 |         tf.summary.scalar('benefit', self._agent.portfolio_value)
146 |         tf.summary.scalar('log_mean', self._agent.log_mean)
147 |         tf.summary.scalar('loss', self._agent.loss)
148 |         tf.summary.scalar("log_mean_free", self._agent.log_mean_free)
149 |         for layer_key in self._agent.layers_dict:
150 |             tf.summary.histogram(layer_key, self._agent.layers_dict[layer_key])
151 |         for var in tf.trainable_variables():
152 |             tf.summary.histogram(var.name, var)
153 |         grads = tf.gradients(self._agent.loss, tf.trainable_variables())
154 |         for grad in grads:
155 |             tf.summary.histogram(grad.name + '/gradient', grad)
156 |         self.summary = tf.summary.merge_all()
157 |         location = log_file_dir
158 |         self.network_writer = tf.summary.FileWriter(location + '/network',
159 |                                                     self._agent.session.graph)
160 |         self.test_writer = tf.summary.FileWriter(location + '/test')
161 |         self.train_writer = tf.summary.FileWriter(location + '/train')
162 | 
163 |     def __print_upperbound(self):
164 |         upperbound_test = self.calculate_upperbound(self.test_set["y"])
165 |         logging.info("upper bound in test is %s" % upperbound_test)
166 | 
167 |     def train_net(self, log_file_dir="./tensorboard", index="0"):
168 |         """
169 |         :param log_file_dir: logging of the training process
170 |         :param index: sub-folder name under train_package
171 |         :return: the result named tuple
172 |         """
173 |         self.__print_upperbound()
174 |         if log_file_dir:
175 |             if self.device == "cpu":
176 |                 with tf.device("/cpu:0"):
177 |                     self.__init_tensor_board(log_file_dir)
178 |             else:
179 |                 self.__init_tensor_board(log_file_dir)
180 |         starttime = time.time()
181 | 
182 |         total_data_time = 0
183 |         total_training_time = 0
184 |         for i in range(self.train_config["steps"]):
185 |             step_start = time.time()
186 |             x, y, last_w, setw = self.next_batch()
187 |             finish_data = time.time()
188 |             total_data_time += (finish_data - step_start)
189 |             self._agent.train(x, y, last_w=last_w, setw=setw)
190 |             total_training_time += time.time() - finish_data
191 |             if i % 1000 == 0 and log_file_dir:
192 |                 logging.info("average time for data accessing is %s"%(total_data_time/1000))
193 |                 logging.info("average time for training is %s"%(total_training_time/1000))
194 |                 total_training_time = 0
195 |                 total_data_time = 0
196 |                 self.log_between_steps(i)
197 | 
198 |         if self.save_path:
199 |             self._agent.recycle()
200 |             best_agent = NNAgent(self.config, restore_dir=self.save_path)
201 |             self._agent = best_agent
202 | 
203 |         pv, log_mean = self._evaluate("test", self._agent.portfolio_value, self._agent.log_mean)
204 |         logging.warning('the portfolio value train No.%s is %s log_mean is %s,'
205 |                         ' the training time is %d seconds' % (index, pv, log_mean, time.time() - starttime))
206 | 
207 |         return self.__log_result_csv(index, time.time() - starttime)
208 | 
209 |     def __log_result_csv(self, index, time):
210 |         from pgportfolio.trade import backtest
211 |         dataframe = None
212 |         csv_dir = './train_package/train_summary.csv'
213 |         tflearn.is_training(False, self._agent.session)
214 |         v_pv, v_log_mean, benefit_array, v_log_mean_free =\
215 |             self._evaluate("test",
216 |                            self._agent.portfolio_value,
217 |                            self._agent.log_mean,
218 |                            self._agent.pv_vector,
219 |                            self._agent.log_mean_free)
220 | 
221 |         backtest = backtest.BackTest(self.config.copy(),
222 |                                      net_dir=None,
223 |                                      agent=self._agent)
224 | 
225 |         backtest.start_trading()
226 |         result = Result(test_pv=[v_pv],
227 |                         test_log_mean=[v_log_mean],
228 |                         test_log_mean_free=[v_log_mean_free],
229 |                         test_history=[''.join(str(e)+', ' for e in benefit_array)],
230 |                         config=[json.dumps(self.config)],
231 |                         net_dir=[index],
232 |                         backtest_test_pv=[backtest.test_pv],
233 |                         backtest_test_history=[''.join(str(e)+', ' for e in backtest.test_pc_vector)],
234 |                         backtest_test_log_mean=[np.mean(np.log(backtest.test_pc_vector))],
235 |                         training_time=int(time))
236 |         new_data_frame = pd.DataFrame(result._asdict()).set_index("net_dir")
237 |         if os.path.isfile(csv_dir):
238 |             dataframe = pd.read_csv(csv_dir).set_index("net_dir")
239 |             dataframe = dataframe.append(new_data_frame)
240 |         else:
241 |             dataframe = new_data_frame
242 |         if int(index) > 0:
243 |             dataframe.to_csv(csv_dir)
244 |         return result
245 | 
246 | 


--------------------------------------------------------------------------------
/pgportfolio/marketdata/globaldatamatrix.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import absolute_import
  3 | from __future__ import print_function
  4 | 
  5 | from pgportfolio.marketdata.coinlist import CoinList
  6 | import numpy as np
  7 | import pandas as pd
  8 | from pgportfolio.tools.data import panel_fillna
  9 | from pgportfolio.constants import *
 10 | import sqlite3
 11 | from datetime import datetime
 12 | import logging
 13 | 
 14 | 
 15 | class HistoryManager:
 16 |     # if offline ,the coin_list could be None
 17 |     # NOTE: return of the sqlite results is a list of tuples, each tuple is a row
 18 |     def __init__(self, coin_number, end, volume_average_days=1, volume_forward=0, online=True):
 19 |         self.initialize_db()
 20 |         self.__storage_period = FIVE_MINUTES  # keep this as 300
 21 |         self._coin_number = coin_number
 22 |         self._online = online
 23 |         if self._online:
 24 |             self._coin_list = CoinList(end, volume_average_days, volume_forward)
 25 |         self.__volume_forward = volume_forward
 26 |         self.__volume_average_days = volume_average_days
 27 |         self.__coins = None
 28 | 
 29 |     @property
 30 |     def coins(self):
 31 |         return self.__coins
 32 | 
 33 |     def initialize_db(self):
 34 |         with sqlite3.connect(DATABASE_DIR) as connection:
 35 |             cursor = connection.cursor()
 36 |             cursor.execute('CREATE TABLE IF NOT EXISTS History (date INTEGER,'
 37 |                            ' coin varchar(20), high FLOAT, low FLOAT,'
 38 |                            ' open FLOAT, close FLOAT, volume FLOAT, '
 39 |                            ' quoteVolume FLOAT, weightedAverage FLOAT,'
 40 |                            'PRIMARY KEY (date, coin));')
 41 |             connection.commit()
 42 | 
 43 |     def get_global_data_matrix(self, start, end, period=300, features=('close',)):
 44 |         """
 45 |         :return a numpy ndarray whose axis is [feature, coin, time]
 46 |         """
 47 |         return self.get_global_panel(start, end, period, features).values
 48 | 
 49 |     def get_global_panel(self, start, end, period=300, features=('close',)):
 50 |         """
 51 |         :param start/end: linux timestamp in seconds
 52 |         :param period: time interval of each data access point
 53 |         :param features: tuple or list of the feature names
 54 |         :return a panel, [feature, coin, time]
 55 |         """
 56 |         start = int(start - (start%period))
 57 |         end = int(end - (end%period))
 58 |         coins = self.select_coins(start=end - self.__volume_forward - self.__volume_average_days * DAY,
 59 |                                   end=end-self.__volume_forward)
 60 |         self.__coins = coins
 61 |         for coin in coins:
 62 |             self.update_data(start, end, coin)
 63 | 
 64 |         if len(coins)!=self._coin_number:
 65 |             raise ValueError("the length of selected coins %d is not equal to expected %d"
 66 |                              % (len(coins), self._coin_number))
 67 | 
 68 |         logging.info("feature type list is %s" % str(features))
 69 |         self.__checkperiod(period)
 70 | 
 71 |         time_index = pd.to_datetime(list(range(start, end+1, period)),unit='s')
 72 |         panel = pd.Panel(items=features, major_axis=coins, minor_axis=time_index, dtype=np.float32)
 73 | 
 74 |         connection = sqlite3.connect(DATABASE_DIR)
 75 |         try:
 76 |             for row_number, coin in enumerate(coins):
 77 |                 for feature in features:
 78 |                     # NOTE: transform the start date to end date
 79 |                     if feature == "close":
 80 |                         sql = ("SELECT date+300 AS date_norm, close FROM History WHERE"
 81 |                                " date_norm>={start} and date_norm<={end}" 
 82 |                                " and date_norm%{period}=0 and coin=\"{coin}\"".format(
 83 |                                start=start, end=end, period=period, coin=coin))
 84 |                     elif feature == "open":
 85 |                         sql = ("SELECT date+{period} AS date_norm, open FROM History WHERE"
 86 |                                " date_norm>={start} and date_norm<={end}" 
 87 |                                " and date_norm%{period}=0 and coin=\"{coin}\"".format(
 88 |                                start=start, end=end, period=period, coin=coin))
 89 |                     elif feature == "volume":
 90 |                         sql = ("SELECT date_norm, SUM(volume)"+
 91 |                                " FROM (SELECT date+{period}-(date%{period}) "
 92 |                                "AS date_norm, volume, coin FROM History)"
 93 |                                " WHERE date_norm>={start} and date_norm<={end} and coin=\"{coin}\""
 94 |                                " GROUP BY date_norm".format(
 95 |                                     period=period,start=start,end=end,coin=coin))
 96 |                     elif feature == "high":
 97 |                         sql = ("SELECT date_norm, MAX(high)" +
 98 |                                " FROM (SELECT date+{period}-(date%{period})"
 99 |                                " AS date_norm, high, coin FROM History)"
100 |                                " WHERE date_norm>={start} and date_norm<={end} and coin=\"{coin}\""
101 |                                " GROUP BY date_norm".format(
102 |                                     period=period,start=start,end=end,coin=coin))
103 |                     elif feature == "low":
104 |                         sql = ("SELECT date_norm, MIN(low)" +
105 |                                 " FROM (SELECT date+{period}-(date%{period})"
106 |                                 " AS date_norm, low, coin FROM History)"
107 |                                 " WHERE date_norm>={start} and date_norm<={end} and coin=\"{coin}\""
108 |                                 " GROUP BY date_norm".format(
109 |                                     period=period,start=start,end=end,coin=coin))
110 |                     else:
111 |                         msg = ("The feature %s is not supported" % feature)
112 |                         logging.error(msg)
113 |                         raise ValueError(msg)
114 |                     serial_data = pd.read_sql_query(sql, con=connection,
115 |                                                     parse_dates=["date_norm"],
116 |                                                     index_col="date_norm")
117 |                     panel.loc[feature, coin, serial_data.index] = serial_data.squeeze()
118 |                     panel = panel_fillna(panel, "both")
119 |         finally:
120 |             connection.commit()
121 |             connection.close()
122 |         return panel
123 | 
124 |     # select top coin_number of coins by volume from start to end
125 |     def select_coins(self, start, end):
126 |         if not self._online:
127 |             logging.info("select coins offline from %s to %s" % (datetime.fromtimestamp(start).strftime('%Y-%m-%d %H:%M'),
128 |                                                                     datetime.fromtimestamp(end).strftime('%Y-%m-%d %H:%M')))
129 |             connection = sqlite3.connect(DATABASE_DIR)
130 |             try:
131 |                 cursor=connection.cursor()
132 |                 cursor.execute('SELECT coin,SUM(volume) AS total_volume FROM History WHERE'
133 |                                ' date>=? and date<=? GROUP BY coin'
134 |                                ' ORDER BY total_volume DESC LIMIT ?;',
135 |                                (int(start), int(end), self._coin_number))
136 |                 coins_tuples = cursor.fetchall()
137 | 
138 |                 if len(coins_tuples)!=self._coin_number:
139 |                     logging.error("the sqlite error happend")
140 |             finally:
141 |                 connection.commit()
142 |                 connection.close()
143 |             coins = []
144 |             for tuple in coins_tuples:
145 |                 coins.append(tuple[0])
146 |         else:
147 |             coins = list(self._coin_list.topNVolume(n=self._coin_number).index)
148 |         logging.debug("Selected coins are: "+str(coins))
149 |         return coins
150 | 
151 |     def __checkperiod(self, period):
152 |         if period == FIVE_MINUTES:
153 |             return
154 |         elif period == FIFTEEN_MINUTES:
155 |             return
156 |         elif period == HALF_HOUR:
157 |             return
158 |         elif period == TWO_HOUR:
159 |             return
160 |         elif period == FOUR_HOUR:
161 |             return
162 |         elif period == DAY:
163 |             return
164 |         else:
165 |             raise ValueError('peroid has to be 5min, 15min, 30min, 2hr, 4hr, or a day')
166 | 
167 |     # add new history data into the database
168 |     def update_data(self, start, end, coin):
169 |         connection = sqlite3.connect(DATABASE_DIR)
170 |         try:
171 |             cursor = connection.cursor()
172 |             min_date = cursor.execute('SELECT MIN(date) FROM History WHERE coin=?;', (coin,)).fetchall()[0][0]
173 |             max_date = cursor.execute('SELECT MAX(date) FROM History WHERE coin=?;', (coin,)).fetchall()[0][0]
174 | 
175 |             if min_date==None or max_date==None:
176 |                 self.__fill_data(start, end, coin, cursor)
177 |             else:
178 |                 if max_date+10*self.__storage_period<end:
179 |                     if not self._online:
180 |                         raise Exception("Have to be online")
181 |                     self.__fill_data(max_date + self.__storage_period, end, coin, cursor)
182 |                 if min_date>start and self._online:
183 |                     self.__fill_data(start, min_date - self.__storage_period-1, coin, cursor)
184 | 
185 |             # if there is no data
186 |         finally:
187 |             connection.commit()
188 |             connection.close()
189 | 
190 |     def __fill_data(self, start, end, coin, cursor):
191 |         duration = 7819200 # three months
192 |         bk_start = start
193 |         for bk_end in range(start+duration-1, end, duration):
194 |             self.__fill_part_data(bk_start, bk_end, coin, cursor)
195 |             bk_start += duration
196 |         if bk_start < end:
197 |             self.__fill_part_data(bk_start, end, coin, cursor)
198 | 
199 |     def __fill_part_data(self, start, end, coin, cursor):
200 |         chart = self._coin_list.get_chart_until_success(
201 |             pair=self._coin_list.allActiveCoins.at[coin, 'pair'],
202 |             start=start,
203 |             end=end,
204 |             period=self.__storage_period)
205 |         logging.info("fill %s data from %s to %s"%(coin, datetime.fromtimestamp(start).strftime('%Y-%m-%d %H:%M'),
206 |                                             datetime.fromtimestamp(end).strftime('%Y-%m-%d %H:%M')))
207 |         for c in chart:
208 |             if c["date"] > 0:
209 |                 if c['weightedAverage'] == 0:
210 |                     weightedAverage = c['close']
211 |                 else:
212 |                     weightedAverage = c['weightedAverage']
213 | 
214 |                 #NOTE here the USDT is in reversed order
215 |                 if 'reversed_' in coin:
216 |                     cursor.execute('INSERT INTO History VALUES (?,?,?,?,?,?,?,?,?)',
217 |                         (c['date'],coin,1.0/c['low'],1.0/c['high'],1.0/c['open'],
218 |                         1.0/c['close'],c['quoteVolume'],c['volume'],
219 |                         1.0/weightedAverage))
220 |                 else:
221 |                     cursor.execute('INSERT INTO History VALUES (?,?,?,?,?,?,?,?,?)',
222 |                                    (c['date'],coin,c['high'],c['low'],c['open'],
223 |                                     c['close'],c['volume'],c['quoteVolume'],
224 |                                     weightedAverage))
225 | 


--------------------------------------------------------------------------------