├── DL_moudle ├── RL_moudle │ ├── CWRNN_DDQN │ │ ├── Data_Process.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── Data_Process.cpython-35.pyc │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── model.cpython-35.pyc │ │ │ └── trade_env.cpython-35.pyc │ │ ├── model.py │ │ └── trade_env.py │ ├── __init__.py │ └── __pycache__ │ │ └── __init__.cpython-35.pyc ├── __init__.py └── __pycache__ │ └── __init__.cpython-35.pyc ├── LICENSE └── README.md /DL_moudle/RL_moudle/CWRNN_DDQN/Data_Process.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Feb 22 15:36:17 2019 4 | 5 | @author: fgw 6 | """ 7 | 8 | import pandas as pd 9 | 10 | from collections import deque 11 | from sklearn.preprocessing import MinMaxScaler 12 | 13 | class Data_Process(object): 14 | def __init__(self, data_path): 15 | data = pd.read_csv(data_path, sep=',') 16 | data['Date'] = pd.to_datetime(data['Date']) 17 | data = data.set_index('Date') 18 | 19 | TWSE_data = data[['Open', 'High', 'Low', 'Close', 'Futures spread', 'Jump spread']] 20 | 21 | Observe_data = TWSE_data.copy() 22 | Observe_data['High'] = Observe_data['High'].shift(1) 23 | Observe_data['Low'] = Observe_data['Low'].shift(1) 24 | Observe_data['Close'] = Observe_data['Close'].shift(1) 25 | 26 | price_data = data[['Open_futures', 'Close_futures']] 27 | price_data.columns = ['Open', 'Close'] 28 | 29 | self.N_price_data = price_data 30 | self.N_Observe_data = Observe_data 31 | 32 | def spilt_to_train_test(self, date_split, date_end, data_start = '2006-01-01'): 33 | 34 | train_data = self.N_price_data[data_start:date_split] 35 | test_data = self.N_price_data[date_split:date_end] 36 | 37 | train_data_Ob = self.N_Observe_data[data_start:date_split] 38 | test_data_Ob = self.N_Observe_data[date_split:date_end] 39 | 40 | ##### train 41 | 42 | train_data_Ob_OHLC = train_data_Ob[['Open', 'High', 'Low', 'Close']] 43 | train_data_Ob_spread = train_data_Ob[['Futures spread', 'Jump spread']] 44 | 45 | spread_scaler_train = MinMaxScaler(feature_range=(-1,1)) 46 | 47 | Ntrain_data_Ob_spread = spread_scaler_train.fit_transform(train_data_Ob_spread) 48 | Ntrain_data_Ob_spread = pd.DataFrame(Ntrain_data_Ob_spread) 49 | Ntrain_data_Ob_spread.index = train_data_Ob_spread.index 50 | Ntrain_data_Ob_spread.columns = train_data_Ob_spread.columns 51 | 52 | Size = -23 53 | 54 | scalar_data = self.N_Observe_data[:data_start].iloc[Size:].values[:,:4] 55 | 56 | train_scalar_data = deque(maxlen=len(scalar_data)) 57 | for i in range(len(scalar_data)): 58 | train_scalar_data.append(scalar_data[i]) 59 | 60 | train_dic={} 61 | scaler_train = MinMaxScaler(feature_range=(-1,1)) 62 | 63 | for i in range(len(train_data_Ob)): 64 | scaler_train.fit(train_scalar_data) 65 | train_dic[train_data_Ob_OHLC.index[i]] = scaler_train.transform(train_data_Ob_OHLC.iloc[i].values.reshape(1, -1)).reshape(-1) 66 | train_scalar_data.append(train_data_Ob_OHLC.iloc[i].values.reshape(-1)) 67 | 68 | Ntrain_data_OHLC = pd.DataFrame(train_dic).transpose() 69 | Ntrain_data_OHLC.columns = train_data_Ob_OHLC.columns 70 | 71 | Ntrain_data = Ntrain_data_OHLC.join(Ntrain_data_Ob_spread) 72 | 73 | ##### test 74 | 75 | test_data_Ob_OHLC = test_data_Ob[['Open', 'High', 'Low', 'Close']] 76 | test_data_Ob_spread = test_data_Ob[['Futures spread', 'Jump spread']] 77 | 78 | Ntest_data_Ob_spread = spread_scaler_train.transform(test_data_Ob_spread) 79 | Ntest_data_Ob_spread = pd.DataFrame(Ntest_data_Ob_spread) 80 | Ntest_data_Ob_spread.index = test_data_Ob_spread.index 81 | Ntest_data_Ob_spread.columns = test_data_Ob_spread.columns 82 | 83 | scalar_data = train_data_Ob.iloc[Size:].values [:,:4] 84 | 85 | test_scalar_data = deque(maxlen=len(scalar_data)) 86 | for i in range(len(scalar_data)): 87 | test_scalar_data.append(scalar_data[i]) 88 | 89 | test_dic={} 90 | scaler_test = MinMaxScaler(feature_range=(-1,1)) 91 | 92 | for i in range(len(test_data_Ob)): 93 | scaler_test.fit(test_scalar_data) 94 | test_dic[test_data_Ob_OHLC.index[i]] = scaler_test.transform(test_data_Ob_OHLC.iloc[i].values.reshape(1, -1)).reshape(-1) 95 | test_scalar_data.append(test_data_Ob_OHLC.iloc[i].values.reshape(-1)) 96 | 97 | Ntest_data_OHLC = pd.DataFrame(test_dic).transpose() 98 | Ntest_data_OHLC.columns = test_data_Ob_OHLC.columns 99 | 100 | Ntest_data = Ntest_data_OHLC.join(Ntest_data_Ob_spread) 101 | 102 | return train_data, Ntrain_data, test_data, Ntest_data 103 | 104 | def spilt_to_train_val_test(self, date_val_split, date_test_split, date_end, data_start = '2006-01-01'): 105 | 106 | train_data = self.N_price_data[data_start:date_val_split] 107 | val_data = self.N_price_data[date_val_split:date_test_split] 108 | test_data = self.N_price_data[date_test_split:date_end] 109 | 110 | train_data_Ob = self.N_Observe_data[data_start:date_val_split] 111 | val_data_Ob = self.N_Observe_data[date_val_split:date_test_split] 112 | test_data_Ob = self.N_Observe_data[date_test_split:date_end] 113 | 114 | ##### train 115 | 116 | train_data_Ob_OHLC = train_data_Ob[['Open', 'High', 'Low', 'Close']] 117 | train_data_Ob_spread = train_data_Ob[['Futures spread', 'Jump spread']] 118 | 119 | spread_scaler_train = MinMaxScaler(feature_range=(-1,1)) 120 | 121 | Ntrain_data_Ob_spread = spread_scaler_train.fit_transform(train_data_Ob_spread) 122 | Ntrain_data_Ob_spread = pd.DataFrame(Ntrain_data_Ob_spread) 123 | Ntrain_data_Ob_spread.index = train_data_Ob_spread.index 124 | Ntrain_data_Ob_spread.columns = train_data_Ob_spread.columns 125 | 126 | Size = -23 127 | 128 | scalar_data = self.N_Observe_data[:data_start].iloc[Size:].values[:,:4] 129 | 130 | train_scalar_data = deque(maxlen=len(scalar_data)) 131 | for i in range(len(scalar_data)): 132 | train_scalar_data.append(scalar_data[i]) 133 | 134 | train_dic={} 135 | scaler_train = MinMaxScaler(feature_range=(-1,1)) 136 | 137 | for i in range(len(train_data_Ob)): 138 | scaler_train.fit(train_scalar_data) 139 | train_dic[train_data_Ob_OHLC.index[i]] = scaler_train.transform(train_data_Ob_OHLC.iloc[i].values.reshape(1, -1)).reshape(-1) 140 | train_scalar_data.append(train_data_Ob_OHLC.iloc[i].values.reshape(-1)) 141 | 142 | Ntrain_data_OHLC = pd.DataFrame(train_dic).transpose() 143 | Ntrain_data_OHLC.columns = train_data_Ob_OHLC.columns 144 | 145 | Ntrain_data = Ntrain_data_OHLC.join(Ntrain_data_Ob_spread) 146 | 147 | ##### val 148 | 149 | val_data_Ob_OHLC = val_data_Ob[['Open', 'High', 'Low', 'Close']] 150 | val_data_Ob_spread = val_data_Ob[['Futures spread', 'Jump spread']] 151 | 152 | Nval_data_Ob_spread = spread_scaler_train.transform(val_data_Ob_spread) 153 | Nval_data_Ob_spread = pd.DataFrame(Nval_data_Ob_spread) 154 | Nval_data_Ob_spread.index = val_data_Ob_spread.index 155 | Nval_data_Ob_spread.columns = val_data_Ob_spread.columns 156 | 157 | scalar_data = train_data_Ob.iloc[Size:].values[:,:4] 158 | 159 | val_scalar_data = deque(maxlen=len(scalar_data)) 160 | for i in range(len(scalar_data)): 161 | val_scalar_data.append(scalar_data[i]) 162 | 163 | val_dic={} 164 | scaler_val = MinMaxScaler(feature_range=(-1,1)) 165 | 166 | for i in range(len(val_data_Ob)): 167 | scaler_val.fit(val_scalar_data) 168 | val_dic[val_data_Ob_OHLC.index[i]] = scaler_val.transform(val_data_Ob_OHLC.iloc[i].values.reshape(1, -1)).reshape(-1) 169 | val_scalar_data.append(val_data_Ob_OHLC.iloc[i].values.reshape(-1)) 170 | 171 | Nval_data_OHLC = pd.DataFrame(val_dic).transpose() 172 | Nval_data_OHLC.columns = val_data_Ob_OHLC.columns 173 | 174 | Nval_data = Nval_data_OHLC.join(Nval_data_Ob_spread) 175 | 176 | ##### test 177 | 178 | test_data_Ob_OHLC = test_data_Ob[['Open', 'High', 'Low', 'Close']] 179 | test_data_Ob_spread = test_data_Ob[['Futures spread', 'Jump spread']] 180 | 181 | spread_scaler_test = MinMaxScaler(feature_range=(-1,1)) 182 | spread_scaler_test.fit(train_data_Ob_spread.append(test_data_Ob_spread)) 183 | 184 | Ntest_data_Ob_spread = spread_scaler_test.transform(test_data_Ob_spread) 185 | Ntest_data_Ob_spread = pd.DataFrame(Ntest_data_Ob_spread) 186 | Ntest_data_Ob_spread.index = test_data_Ob_spread.index 187 | Ntest_data_Ob_spread.columns = test_data_Ob_spread.columns 188 | 189 | scalar_data = val_data_Ob.iloc[Size:].values [:,:4] 190 | 191 | test_scalar_data = deque(maxlen=len(scalar_data)) 192 | for i in range(len(scalar_data)): 193 | test_scalar_data.append(scalar_data[i]) 194 | 195 | test_dic={} 196 | scaler_test = MinMaxScaler(feature_range=(-1,1)) 197 | 198 | for i in range(len(test_data_Ob)): 199 | scaler_test.fit(test_scalar_data) 200 | test_dic[test_data_Ob_OHLC.index[i]] = scaler_test.transform(test_data_Ob_OHLC.iloc[i].values.reshape(1, -1)).reshape(-1) 201 | test_scalar_data.append(test_data_Ob_OHLC.iloc[i].values.reshape(-1)) 202 | 203 | Ntest_data_OHLC = pd.DataFrame(test_dic).transpose() 204 | Ntest_data_OHLC.columns = test_data_Ob_OHLC.columns 205 | 206 | Ntest_data = Ntest_data_OHLC.join(Ntest_data_Ob_spread) 207 | 208 | return train_data, Ntrain_data, val_data, Nval_data, test_data, Ntest_data -------------------------------------------------------------------------------- /DL_moudle/RL_moudle/CWRNN_DDQN/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Feb 22 12:03:24 2019 4 | 5 | @author: fgw 6 | """ 7 | 8 | -------------------------------------------------------------------------------- /DL_moudle/RL_moudle/CWRNN_DDQN/__pycache__/Data_Process.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinRiver/Deep-Reinforcement-Learning-on-Futures-Trading/8da8e08385058828bb4b2a08782492619f7f77c2/DL_moudle/RL_moudle/CWRNN_DDQN/__pycache__/Data_Process.cpython-35.pyc -------------------------------------------------------------------------------- /DL_moudle/RL_moudle/CWRNN_DDQN/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinRiver/Deep-Reinforcement-Learning-on-Futures-Trading/8da8e08385058828bb4b2a08782492619f7f77c2/DL_moudle/RL_moudle/CWRNN_DDQN/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /DL_moudle/RL_moudle/CWRNN_DDQN/__pycache__/model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinRiver/Deep-Reinforcement-Learning-on-Futures-Trading/8da8e08385058828bb4b2a08782492619f7f77c2/DL_moudle/RL_moudle/CWRNN_DDQN/__pycache__/model.cpython-35.pyc -------------------------------------------------------------------------------- /DL_moudle/RL_moudle/CWRNN_DDQN/__pycache__/trade_env.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinRiver/Deep-Reinforcement-Learning-on-Futures-Trading/8da8e08385058828bb4b2a08782492619f7f77c2/DL_moudle/RL_moudle/CWRNN_DDQN/__pycache__/trade_env.cpython-35.pyc -------------------------------------------------------------------------------- /DL_moudle/RL_moudle/CWRNN_DDQN/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Feb 22 12:07:09 2019 4 | 5 | @author: fgw 6 | """ 7 | 8 | import os 9 | import tensorflow as tf 10 | import numpy as np 11 | 12 | from collections import deque 13 | 14 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 15 | 16 | np.random.seed(1) 17 | tf.set_random_seed(2) 18 | 19 | class ClockworkRNN(object): 20 | def __init__(self, input_state, in_length, in_width, hidden_neurons, Rb, clockwork_periods, trainable=True): 21 | 22 | self.in_length = in_length #time_steps 23 | self.in_width = in_width #feature_dim 24 | 25 | self.inputs = input_state 26 | self.hidden_neurons = hidden_neurons 27 | if hidden_neurons / Rb >= 2: 28 | self.Rb = Rb 29 | else: 30 | raise ValueError("Rb must be less than half of hidden_neurons") 31 | self.clockwork_periods = clockwork_periods #for sequential data, this is able to memory at different timing, 1, 2, 4, 8, 16, 32 for instance 32 | self.Ti_sum = len(self.clockwork_periods) 33 | 34 | self.trainable = trainable 35 | 36 | #mask_lower_traingular matrix is for forming mask_upper_traingular matrix by g-models 37 | def _Mask_Matrix(self,W,k): 38 | length = np.int(W/k) 39 | tmp = np.ones([W,W]) 40 | for i in range(length)[1:]: 41 | tmp[i*k:(i+1)*k,:i*k]=0 42 | tmp[(i+1)*k:,:i*k]=0 43 | return np.transpose(tmp) 44 | 45 | def inference(self): 46 | #RNN initial weights 47 | with tf.variable_scope('input_layers1'): 48 | self.WI1 = tf.get_variable('WI1', shape=[self.in_width, self.hidden_neurons], initializer=tf.truncated_normal_initializer(stddev=0.1), trainable=self.trainable) 49 | self.bI1 = tf.get_variable('bI1', shape=[self.hidden_neurons], initializer=tf.truncated_normal_initializer(stddev=0.1), trainable=self.trainable) 50 | 51 | with tf.variable_scope('input_layers2'): 52 | self.WI2 = tf.get_variable('WI2', shape=[self.hidden_neurons, self.hidden_neurons], initializer=tf.truncated_normal_initializer(stddev=0.1), trainable=self.trainable) 53 | self.bI2 = tf.get_variable('bI2', shape=[self.hidden_neurons], initializer=tf.truncated_normal_initializer(stddev=0.1), trainable=self.trainable) 54 | 55 | traingular_mask = self._Mask_Matrix(self.hidden_neurons, self.Rb) 56 | self.traingular_mask = tf.constant(traingular_mask, dtype=tf.float32, name='mask_upper_traingular') 57 | 58 | with tf.variable_scope('hidden_layers_1'): 59 | self.WH1 = tf.get_variable('WH1', shape=[self.hidden_neurons, self.hidden_neurons], initializer=tf.truncated_normal_initializer(stddev=0.1), trainable=self.trainable) 60 | self.WH1 = tf.multiply(self.WH1, self.traingular_mask) 61 | self.bH1 = tf.get_variable('bH1', shape=[self.hidden_neurons], initializer=tf.truncated_normal_initializer(stddev=0.1), trainable=self.trainable) 62 | 63 | with tf.variable_scope('hidden_layers_2'): 64 | self.WH2 = tf.get_variable('WH2', shape=[self.hidden_neurons, self.hidden_neurons], initializer=tf.truncated_normal_initializer(stddev=0.1), trainable=self.trainable) 65 | self.WH2 = tf.multiply(self.WH2, self.traingular_mask) 66 | self.bH2 = tf.get_variable('bH2', shape=[self.hidden_neurons], initializer=tf.truncated_normal_initializer(stddev=0.1), trainable=self.trainable) 67 | 68 | #make training data structure transform to list structure 69 | X_list = [tf.squeeze(x, axis=[1]) for x 70 | in tf.split(value=self.inputs, axis=1, num_or_size_splits=self.in_length, name='inputs_list')] 71 | 72 | with tf.variable_scope('clockwork_rnn') as scope: 73 | # set initial numbers on hidden layer 74 | self.state1 = tf.get_variable('hidden_sate1', shape=[1, self.hidden_neurons],initializer=tf.zeros_initializer(), trainable=False) 75 | self.state2 = tf.get_variable('hidden_sate2', shape=[1, self.hidden_neurons],initializer=tf.zeros_initializer(), trainable=False) 76 | 77 | for i in range(self.in_length): 78 | #get g_moduels index 79 | if i>0: 80 | scope.reuse_variables() 81 | g_counter = 0 82 | for j in range(self.Ti_sum): 83 | if i%self.clockwork_periods[j]==0: 84 | g_counter += 1 85 | if g_counter == self.Ti_sum: 86 | g_counter = self.hidden_neurons 87 | else: 88 | g_counter *= self.Rb 89 | 90 | #at the moment eq1 91 | tmp_right1 = tf.matmul(X_list[i], tf.slice(self.WI1, [0,0], [-1,g_counter])) 92 | tmp_right1 = tf.nn.bias_add(tmp_right1, tf.slice(self.bI1,[0],[g_counter])) 93 | self.WH1 = tf.multiply(self.WH1, self.traingular_mask) 94 | tmp_left1 = tf.matmul(self.state1, tf.slice(self.WH1, [0,0], [-1,g_counter])) 95 | tmp_left1 = tf.nn.bias_add(tmp_left1, tf.slice(self.bH1,[0],[g_counter])) 96 | tmp_hidden1 = tf.tanh(tf.add(tmp_left1, tmp_right1)) 97 | 98 | #update hidden layers 99 | self.state1 = tf.concat(axis=1, values=[tmp_hidden1, tf.slice(self.state1, [0, g_counter], [-1,-1])]) 100 | 101 | tmp_right2 = tf.matmul(self.state1, tf.slice(self.WI2, [0,0], [-1,g_counter])) 102 | tmp_right2 = tf.nn.bias_add(tmp_right2, tf.slice(self.bI2,[0],[g_counter])) 103 | self.WH2 = tf.multiply(self.WH2, self.traingular_mask) 104 | tmp_left2 = tf.matmul(self.state2, tf.slice(self.WH2, [0,0], [-1,g_counter])) 105 | tmp_left2 = tf.nn.bias_add(tmp_left2, tf.slice(self.bH2,[0],[g_counter])) 106 | tmp_hidden2 = tf.tanh(tf.add(tmp_left2, tmp_right2)) 107 | 108 | self.state2 = tf.concat(axis=1, values=[tmp_hidden2, tf.slice(self.state2, [0, g_counter], [-1,-1])]) 109 | 110 | self.final_state = self.state2 111 | 112 | return self.final_state 113 | 114 | 115 | class DQNCore(object): 116 | def __init__(self, observation, num_actions, time_step, start_l_rate, decay_step, decay_rate, cwrnn_hidden_neurons, 117 | cwrnn_Rb, cwrnn_clockwork_periods, gamma, dropout, temp, save_path, test_lr, training=True, loss='MSE'): 118 | 119 | self.num_actions = num_actions 120 | self.gamma = gamma # discount factor for excepted returns 121 | self.start_l_rate = start_l_rate 122 | self.decay_step = decay_step 123 | self.decay_rate = decay_rate 124 | self.global_step = tf.Variable(0, trainable=False) 125 | if training: 126 | self.learning_rate = tf.train.exponential_decay(self.start_l_rate, self.global_step, self.decay_step, self.decay_rate, staircase=True) 127 | else: 128 | self.learning_rate = test_lr 129 | self.dropout = dropout 130 | self.temp = temp 131 | self.time_step = time_step 132 | self.feature_dim = observation.shape[1] 133 | 134 | ## below are some parameters about CWRNN 135 | self.cwrnn_hidden_neurons = cwrnn_hidden_neurons 136 | self.cwrnn_Rb = cwrnn_Rb 137 | if max(cwrnn_clockwork_periods)<=time_step: 138 | self.cwrnn_clockwork_periods = cwrnn_clockwork_periods 139 | else: 140 | raise ValueError("Max clockwork period must be less than time step") 141 | 142 | self.save_path1 = save_path+'/training' 143 | self.save_path2 = save_path+'/trained' 144 | 145 | #placeholder for samples replay experience 146 | self.inputs = tf.placeholder(tf.float32, [1, self.time_step, self.feature_dim]) 147 | self.targets = tf.placeholder(tf.float32, name= 'targets') #y 148 | self.actions = tf.placeholder(tf.int32, name= 'actions') 149 | self.rewards = tf.placeholder(tf.float32, name='rewards') 150 | self.Q = self._build_CWQNetwork('Qeval', trainable=True) # state Q , main network 151 | self.next_Q = self._build_CWQNetwork('next_eval',trainable=False) # next state Q , target network 152 | 153 | #actions selection corresponding one hot matrix column 154 | one_hot = tf.one_hot(self.actions, self.num_actions, 1., 0.) ##tf.one_hot(input,one_hot dim,1,0) self.actions為0,1,2 155 | Qmax = tf.reduce_sum(self.Q * one_hot, axis=1) 156 | 157 | if loss == 'mse': 158 | self._loss = tf.reduce_mean(tf.squared_difference(Qmax, self.targets)) 159 | elif loss == 'mse_log': 160 | epsilon = 1.0e-9 161 | Qmax = tf.keras.backend.clip(Qmax, epsilon, 1.0 - epsilon) 162 | self._loss = tf.keras.backend.mean(tf.keras.backend.square(tf.keras.backend.log(self.targets) - tf.keras.backend.log(Qmax)), axis=-1) 163 | elif loss == 'mse_sd': 164 | epsilon = 1.0e-9 165 | Qmax = tf.keras.backend.clip(Qmax, epsilon, 1.0 - epsilon) 166 | self._loss = tf.keras.backend.mean(tf.keras.backend.square(self.targets - tf.keras.backend.sqrt(Qmax)), axis=-1) 167 | else: 168 | raise ValueError("action should be elements of ['mse', 'qlike_loss', 'mse_log', 'mse_sd', 'hmse', 'stock_loss']") 169 | 170 | self.params = tf.trainable_variables() 171 | optimizer = tf.train.AdamOptimizer(self.learning_rate) 172 | grad_var = optimizer.compute_gradients(loss = self._loss, var_list = self.params, aggregation_method = 2) 173 | self._train_op = optimizer.apply_gradients(grad_var, global_step = self.global_step) 174 | 175 | #session 176 | self.sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.6))) 177 | self.sess.run(tf.global_variables_initializer()) 178 | 179 | def init(self): 180 | self.state_step = deque(maxlen=self.time_step) 181 | self.next_state_step = deque(maxlen=self.time_step) 182 | 183 | def update_state_step(self, state): 184 | self.state_step.append(state) 185 | 186 | def update_next_state_step(self, next_state): 187 | self.next_state_step.append(next_state) 188 | 189 | def _build_CWQNetwork(self, name, trainable): 190 | w_init, b_init = tf.random_normal_initializer(0.0, 0.3), tf.constant_initializer(0.1) 191 | with tf.variable_scope(name, reuse=tf.AUTO_REUSE): 192 | cw_rnn = ClockworkRNN(input_state=self.inputs, in_length=self.time_step, in_width=self.feature_dim, 193 | hidden_neurons=self.cwrnn_hidden_neurons, Rb=self.cwrnn_Rb, 194 | clockwork_periods=self.cwrnn_clockwork_periods, trainable=trainable) 195 | 196 | final_state = cw_rnn.inference() 197 | q_network = tf.layers.dense(final_state, self.num_actions, None, kernel_initializer=w_init, bias_initializer=b_init, trainable=trainable, name='output_layer') 198 | return q_network 199 | 200 | def update_nextQ_network(self): 201 | next_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='next_eval') 202 | Q_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Qeval') 203 | #get the min length of zip between them 204 | self.sess.run([tf.assign(n,q) for n,q in zip(next_params, Q_params)]) 205 | 206 | def Incremental_Methods(self, action, reward, done): 207 | state = np.asarray([self.state_step[i] for i in range(len(self.state_step))]) 208 | next_state = np.asarray([self.next_state_step[i] for i in range(len(self.next_state_step))]) 209 | 210 | ## Double DQN 211 | next_Q_main = self.sess.run(self.Q, feed_dict={self.inputs:next_state.reshape(1, self.time_step, self.feature_dim)}) 212 | max_action = np.argmax(next_Q_main[0]) 213 | next_Q_target = self.sess.run(self.next_Q, feed_dict={self.inputs:next_state.reshape(1, self.time_step, self.feature_dim)}) 214 | next_Q = next_Q_target[0][max_action] 215 | 216 | #done mask True 1 False 0 217 | mask = np.array(done).astype('float') 218 | target = mask * reward + (1 - mask) * (reward + self.gamma * next_Q) 219 | 220 | #op gradient descent step 221 | _ , loss = self.sess.run([self._train_op, self._loss], feed_dict={self.inputs:state.reshape(1, self.time_step, self.feature_dim), self.actions:action, self.targets:target}) ##訓練權重 222 | return loss 223 | 224 | def boltzmann_policy_np(self): 225 | if len(self.state_step) >= self.time_step: 226 | state = np.asarray([self.state_step[i][0] for i in range(len(self.state_step))]) 227 | Q = self.sess.run(self.Q, feed_dict={self.inputs:state.reshape(1, self.time_step, self.feature_dim)}) 228 | Q_probs = self._softmax(Q[0]/self.temp) 229 | action_value = np.random.choice(Q_probs, p=Q_probs) 230 | action = np.argmax(Q_probs==action_value) 231 | else: 232 | action = 2 233 | Q_probs = np.array([0,0,0]) 234 | return action, Q_probs 235 | 236 | def greedy_policy(self): 237 | if len(self.state_step) >= self.time_step: 238 | state = np.asarray([self.state_step[i][0] for i in range(len(self.state_step))]) 239 | action_value = self.sess.run(self.Q, feed_dict={self.inputs:state.reshape(1, self.time_step, self.feature_dim)}) 240 | action = np.argmax(action_value, axis=1)[0] 241 | else: 242 | action = 2 243 | action_value=np.array([0,0,0]) 244 | return action, action_value 245 | 246 | def save_training_model(self): 247 | if not os.path.isdir(self.save_path1): 248 | os.makedirs(self.save_path1) 249 | 250 | self.saver = tf.train.Saver() 251 | self.saver.save(self.sess, self.save_path1+'/save_model.ckpt') 252 | 253 | #print('training model save successfully!') 254 | 255 | def save_trained_model(self): 256 | if not os.path.isdir(self.save_path2): 257 | os.makedirs(self.save_path2) 258 | 259 | self.saver = tf.train.Saver() 260 | self.saver.save(self.sess, self.save_path2+'/save_model.ckpt') 261 | 262 | print('trained model save successfully!') 263 | 264 | def load_training_model(self): 265 | self.saver = tf.train.Saver() 266 | self.saver.restore(self.sess, self.save_path1+'/save_model.ckpt') 267 | 268 | #print('training model load successfully!') 269 | 270 | def load_trained_model(self): 271 | self.saver = tf.train.Saver() 272 | self.saver.restore(self.sess, self.save_path2+'/save_model.ckpt') 273 | 274 | #print('trained model load successfully!') 275 | 276 | def close_session(self): 277 | self.sess.close() 278 | 279 | print('Close session!') -------------------------------------------------------------------------------- /DL_moudle/RL_moudle/CWRNN_DDQN/trade_env.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Feb 22 12:07:26 2019 4 | 5 | @author: fgw 6 | """ 7 | 8 | import numpy as np 9 | 10 | class environment(object): 11 | 12 | def __init__(self, trade_data, feature_data, split_price=300, commodity_spot=200, initial_cash=1000000, reward_anchored=False, reward_scaler=100): 13 | self.data_close = trade_data['Close'] 14 | self.data_open = trade_data['Open'] 15 | self.data_observation = np.asarray(feature_data) 16 | self.action_space = ['long', 'short', 'close'] 17 | self.split_price = split_price 18 | self.commodity_spot = commodity_spot 19 | self.initial_cash = initial_cash 20 | self.reward_anchored = reward_anchored 21 | self.reset() 22 | 23 | if self.reward_anchored == False: 24 | self.reward_scaler = reward_scaler 25 | else: 26 | pass 27 | 28 | def reset(self): 29 | self.step_counter = 0 30 | self.cash = self.initial_cash 31 | self.total_value = self.cash 32 | self.flags = 0 33 | self.hold_period = 0 34 | self.last_flag = 0 35 | self.stop_loss = False 36 | if self.reward_anchored == True: 37 | self.reward_record = np.array([]) 38 | 39 | def get_initial_state(self): 40 | observation=np.hstack((self.data_observation[0,:], self.flags)).reshape(-1, self.data_observation.shape[1]+1) 41 | return observation 42 | 43 | def get_action_space(self): 44 | return self.action_space 45 | 46 | def long(self): 47 | self.flags = 1 48 | self.cash -= self.split_price 49 | self.cost=self.data_open[self.step_counter] 50 | 51 | def short(self): 52 | self.flags = -1 53 | self.cash -= self.split_price 54 | self.cost=self.data_open[self.step_counter] 55 | 56 | def keep(self): 57 | pass 58 | 59 | def close_long(self): 60 | self.flags = 0 61 | self.profit=(self.data_open[self.step_counter]-self.cost) * self.commodity_spot 62 | self.cash += (-self.split_price+self.profit) 63 | 64 | def close_short(self): 65 | self.flags = 0 66 | self.profit=(self.cost-self.data_open[self.step_counter]) * self.commodity_spot 67 | self.cash += (-self.split_price+self.profit) 68 | 69 | def step_op(self, action): 70 | if action == 'long': 71 | if self.flags == 0: 72 | self.long() 73 | elif self.flags == -1: 74 | self.close_short() 75 | self.long() 76 | else: 77 | self.keep() 78 | 79 | elif action == 'close': 80 | if self.flags == 1: 81 | self.close_long() 82 | elif self.flags == -1: 83 | self.close_short() 84 | else: 85 | pass 86 | 87 | elif action == 'short': 88 | if self.flags == 0: 89 | self.short() 90 | elif self.flags == 1: 91 | self.close_long() 92 | self.short() 93 | else: 94 | self.keep() 95 | 96 | else: 97 | raise ValueError("action should be elements of ['long', 'short', 'close']") 98 | 99 | if self.flags==1: 100 | openposition=(self.data_close[self.step_counter]-self.cost) * self.commodity_spot 101 | elif self.flags==-1: 102 | openposition=(self.cost-self.data_close[self.step_counter]) * self.commodity_spot 103 | else: 104 | openposition=0 105 | 106 | reward = self.cash + openposition - self.total_value 107 | self.step_counter += 1 108 | self.total_value = openposition + self.cash 109 | 110 | if self.flags == self.last_flag and self.last_flag != 0: 111 | self.hold_period += 1 112 | else: 113 | self.hold_period = 0 114 | 115 | self.stop_loss= False 116 | 117 | if self.hold_period == 3: 118 | if openposition < 0: 119 | self.stop_loss = True 120 | else : 121 | pass 122 | else: 123 | pass 124 | 125 | done = False 126 | 127 | if self.step_counter >= (len(self.data_close)-1): 128 | done = True 129 | 130 | try: 131 | next_observation = np.hstack((self.data_observation[self.step_counter,:], self.flags)).reshape(-1, self.data_observation.shape[1]+1) 132 | except: 133 | next_observation = None 134 | done = True 135 | #print('last trade for test data') 136 | 137 | self.last_flag=self.flags 138 | 139 | if self.reward_anchored == True: 140 | self.reward_record = np.hstack((self.reward_record, reward)) 141 | return (reward-np.mean(self.reward_record))/(max(self.reward_record)-min(self.reward_record)+1e-7), next_observation, done 142 | else: 143 | return reward/(self.commodity_spot*self.reward_scaler), next_observation, done 144 | 145 | def step(self, action): 146 | if action == 0: 147 | return self.step_op('long') 148 | elif action == 1: 149 | return self.step_op('short') 150 | elif action == 2: 151 | return self.step_op('close') 152 | else: 153 | raise ValueError("action should be one of [0,1,2]") -------------------------------------------------------------------------------- /DL_moudle/RL_moudle/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Feb 22 12:03:24 2019 4 | 5 | @author: fgw 6 | """ 7 | 8 | -------------------------------------------------------------------------------- /DL_moudle/RL_moudle/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinRiver/Deep-Reinforcement-Learning-on-Futures-Trading/8da8e08385058828bb4b2a08782492619f7f77c2/DL_moudle/RL_moudle/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /DL_moudle/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Feb 22 12:03:24 2019 4 | 5 | @author: fgw 6 | """ 7 | 8 | -------------------------------------------------------------------------------- /DL_moudle/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinRiver/Deep-Reinforcement-Learning-on-Futures-Trading/8da8e08385058828bb4b2a08782492619f7f77c2/DL_moudle/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Lin Yi-Jie 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep-reinforcement-Learning-on-Futures-Trading 2 | ## Introduction 3 | Based on Reinforcement Learning: Q-Learning with Clockwork RNN we develop the Futures-Trading-Robot. 4 | There are four main features below, 5 | 1. Clockwork RNN is modified to two hidden layers. 6 | 2. We adopt Double Dueling-DQN instead of DQN to improve the robustness of trading performance. 7 | 3. For exploration and exploitation on model stability as training, boltzmann_policy is better than greedy_policy. 8 | 4. Considering time series, data dependency, we replace experience replay with data incremental method. 9 | 10 | ## Reference 11 | 1. A clockwork RNN, https://arxiv.org/abs/1402.3511 12 | 2. Dueling Network Architectures for Deep Reinforcement Learning, http://proceedings.mlr.press/v48/wangf16.pdf 13 | 3. Exploration in DeepReinforcement Learning, https://www.ias.informatik.tu-darmstadt.de/uploads/Theses/Abschlussarbeiten/markus_semmler_bsc.pdf 14 | --------------------------------------------------------------------------------