├── .gitignore ├── EURUSD1.mini.csv ├── README.md ├── agent ├── Agent.py └── __init__.py ├── app_worker.py ├── environment ├── EnvPlayer.py ├── Environment.py └── __init__.py ├── main.test.tf.py ├── play_history.py ├── preprocess ├── __init__.py ├── data_pre_process.py └── pattern_encoder.py ├── requirments.txt └── trading_agent.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | config/ 3 | logs/ 4 | model_saved/ 5 | .idea 6 | *.iml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # data_stream_gym 2 | 3 | Use this simple lib for implement RL methods with few 4 | 5 | ## Create environemt 6 | 7 | ``` 8 | class FxEnv(TradeEnvironment): 9 | 10 | @classmethod 11 | def __reward__(self, state, action): 12 | return -1 if action > 0 else 1 13 | ``` 14 | For futher development you need to implement your own Reward function 15 | 16 | Overide 17 | def __reward__(state,action) 18 | 19 | 20 | 21 | 22 | ## Create Agent 23 | 24 | ``` 25 | 26 | class FxTradeAgent(Agent): 27 | 28 | def act(self, state): 29 | return np.argmax(np.random.randint(1, 3, self.action_size)) 30 | 31 | @classmethod 32 | def replay(self, memories): 33 | print(len(memories)) 34 | 35 | ``` 36 | 37 | # Train algo 38 | 39 | ``` 40 | # Create Environment with data values 41 | fx_env = FxEnv(df.values) 42 | pl = PlayGround(env=fx_env, agent=FxTradeAgent(), time_frame=4) 43 | pl.play() 44 | ``` 45 | 46 | ## Licence 47 | MIT 48 | 49 | -------------------------------------------------------------------------------- /agent/Agent.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | 3 | import random 4 | 5 | 6 | class Agent(object): 7 | 8 | def __init__(self, 9 | action_size=3, 10 | max_length=1000, replay_prob=0.995, forget_rate=0.25): 11 | self.action_size = action_size 12 | self.forget_rate = forget_rate 13 | self.replay_prob = replay_prob 14 | self.max_length = max_length 15 | self.__memory__ = deque(maxlen=self.max_length) 16 | self.after_init() 17 | 18 | @classmethod 19 | def after_init(self): 20 | pass 21 | 22 | @classmethod 23 | def act(self, state): 24 | return -1 25 | 26 | @classmethod 27 | def replay(self, memories): 28 | pass 29 | 30 | @classmethod 31 | def after_memories(self, train_status): 32 | pass 33 | 34 | def memorize(self, state_t, action_t, reward_t, state_t_next, done): 35 | self.__memory__.append((state_t, action_t, reward_t, state_t_next, done)) 36 | evaluate_request = False 37 | if len(self.__memory__) == self.__memory__.maxlen: 38 | if random.uniform(0, 1) <= self.replay_prob: 39 | self.replay(self.__memory__) 40 | 41 | # Randomly forgot memories 42 | for f in range(int(len(self.__memory__) * (random.uniform(0, self.forget_rate)))): 43 | self.__memory__.pop() 44 | print("Left Memeory Length {}".format(len(self.__memory__))) 45 | 46 | self.__memory__.pop() # forgot first 47 | evaluate_request = True 48 | self.after_memories(evaluate_request) 49 | 50 | return evaluate_request 51 | -------------------------------------------------------------------------------- /agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceylon-ai-projects/data_stream_gym/eae10bdf157235b73de36c8f16e3d25e212ca22f/agent/__init__.py -------------------------------------------------------------------------------- /app_worker.py: -------------------------------------------------------------------------------- 1 | from celery import Celery 2 | import csv 3 | 4 | app = Celery('tasks', broker='redis://localhost:6379/') 5 | 6 | 7 | @app.task 8 | def write_data_hist(x, y): 9 | with open(r'reward_history.csv', 'a', newline='') as csvfile: 10 | fieldnames = ['x', 'y'] 11 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 12 | writer.writerow({'x': x, 'y': y}) 13 | x += 1 14 | -------------------------------------------------------------------------------- /environment/EnvPlayer.py: -------------------------------------------------------------------------------- 1 | from app_worker import write_data_hist 2 | 3 | 4 | class PlayGround(object): 5 | reward_history = [] 6 | 7 | def __init__(self, env, agent, time_frame=4, playground_step=0): 8 | self.playground_step = playground_step 9 | self.time_frame = time_frame 10 | self.env = env 11 | self.agent = agent 12 | 13 | def play(self): 14 | done = False 15 | state_t_pre = None 16 | action_t_pre = None 17 | steps = 0 18 | agent_act = False 19 | reward_recodes = 0 20 | while done is False: 21 | state_t, done = self.env.get_next_state() 22 | 23 | step_ = self.playground_step + steps 24 | 25 | if steps % self.time_frame == 0: 26 | if agent_act: 27 | reward_t_pre = self.env.calculate_reward(state_t_pre, action_t_pre, state_t) 28 | 29 | # Recode history 30 | 31 | # print(state_t_pre[:1], action_t_pre, state_t[:1], reward_t_pre) 32 | # print(step_) 33 | 34 | if step_ % 10 == 0: 35 | write_data_hist.delay(step_, reward_recodes / steps) 36 | reward_recodes += reward_t_pre 37 | self.agent.memorize(state_t_pre, action_t_pre, reward_t_pre, state_t, done) 38 | agent_act = False 39 | 40 | if agent_act is False: 41 | action_t = self.agent.act(state_t) 42 | # print(action_t) 43 | agent_act = True 44 | action_t_pre = action_t 45 | state_t_pre = state_t 46 | 47 | steps += 1 48 | -------------------------------------------------------------------------------- /environment/Environment.py: -------------------------------------------------------------------------------- 1 | class TradeEnvironment(object): 2 | 3 | def __init__(self, data): 4 | self.__data_feed = iter(data) 5 | self.pre_state = None 6 | 7 | @classmethod 8 | def __reward__(self, state, action, next_state): 9 | return 0 10 | 11 | def calculate_reward(self, state, action, next_state): 12 | reward = self.__reward__(state, action, next_state) 13 | return reward 14 | 15 | def get_next_state(self): 16 | ''' 17 | :param last_action: action for the last state 18 | :return: 19 | reward - reward amount for your last action again last state 20 | state_next - next state 21 | done - True if the data stream end 22 | ''' 23 | try: 24 | state_next = self.__data_feed.__next__() 25 | except: 26 | state_next = None 27 | done = True if state_next is None else False 28 | self.pre_state = state_next 29 | return state_next, done 30 | -------------------------------------------------------------------------------- /environment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceylon-ai-projects/data_stream_gym/eae10bdf157235b73de36c8f16e3d25e212ca22f/environment/__init__.py -------------------------------------------------------------------------------- /main.test.tf.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import tensorflow as tf 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from sklearn.preprocessing import LabelEncoder 7 | from sklearn.utils import shuffle 8 | from sklearn.model_selection import train_test_split 9 | 10 | base_path = "/home/dewmal/WorkingProjects/ceylon_models/trading/trader/" 11 | 12 | 13 | def read_dataset(): 14 | df = pd.read_csv("{}Sonar.csv".format(base_path)) 15 | print(len(df.columns)) 16 | # Variables 17 | x = df[df.columns[0:60]].values 18 | y = df[df.columns[60]] 19 | 20 | # Encode the dependent variable 21 | encoder = LabelEncoder() 22 | encoder.fit(y) 23 | 24 | y = encoder.transform(y) 25 | y = one_hot_encoder(y) 26 | 27 | print(x.shape) 28 | return (x, y) 29 | 30 | 31 | def one_hot_encoder(labels): 32 | n_labels = len(labels) 33 | n_unique_labels = len(np.unique(labels)) 34 | 35 | one_hot_encode = np.zeros((n_labels, n_unique_labels)) 36 | one_hot_encode[np.arange(n_labels), labels] = 1 37 | return one_hot_encode 38 | 39 | 40 | # Read the dataset 41 | 42 | X, Y = read_dataset() 43 | 44 | # Shuffle the dataset to mix up the rows 45 | X, Y = shuffle(X, Y, random_state=1) 46 | 47 | # Split data set to train and test 48 | train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.20, random_state=415) 49 | 50 | # Inspect The shape of train and testing 51 | print(train_x.shape) 52 | print(train_y.shape) 53 | print(test_x.shape) 54 | 55 | # Define the important params and variables to work with tensor 56 | learning_rate = 0.3 57 | training_epochs = 100 58 | cost_history = [] 59 | n_dim = X.shape[1] 60 | print("n dim", n_dim) 61 | n_class = 2 62 | model_path = "{}model_saved".format(base_path) 63 | 64 | # Define the number of hidden layers and number of neurons for each layer 65 | n_hidden_1 = 60 66 | n_hidden_2 = 60 67 | n_hidden_3 = 60 68 | n_hidden_4 = 60 69 | 70 | x = tf.placeholder(tf.float32, [None, n_dim]) 71 | W = tf.Variable(tf.zeros([n_dim, n_class])) 72 | b = tf.Variable(tf.zeros([n_class])) 73 | y_ = tf.placeholder(tf.float32, [None, n_class]) 74 | 75 | 76 | def multiplayer_perception(x, weights, biases): 77 | # Hidden layer with RELU Activation 78 | layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) 79 | layer_1 = tf.nn.relu(layer_1) 80 | 81 | # Hidden layer with sigmoid activation 82 | layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']) 83 | layer_2 = tf.nn.sigmoid(layer_2) 84 | 85 | # Hidden layer with sigmoid activation 86 | layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3']) 87 | layer_3 = tf.nn.sigmoid(layer_3) 88 | 89 | # Hidden layer with RELU Activation 90 | layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4']) 91 | layer_4 = tf.nn.relu(layer_4) 92 | 93 | out_layer = tf.matmul(layer_4, weights['out']) + biases['out'] 94 | return out_layer 95 | 96 | 97 | weights = { 98 | 'h1': tf.Variable(tf.truncated_normal([n_dim, n_hidden_1])), 99 | 'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])), 100 | 'h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3])), 101 | 'h4': tf.Variable(tf.truncated_normal([n_hidden_3, n_hidden_4])), 102 | 'out': tf.Variable(tf.truncated_normal([n_hidden_4, n_class])), 103 | } 104 | 105 | biases = { 106 | 'b1': tf.Variable(tf.truncated_normal([n_hidden_1])), 107 | 'b2': tf.Variable(tf.truncated_normal([n_hidden_2])), 108 | 'b3': tf.Variable(tf.truncated_normal([n_hidden_3])), 109 | 'b4': tf.Variable(tf.truncated_normal([n_hidden_4])), 110 | 'out': tf.Variable(tf.truncated_normal([n_class])), 111 | } 112 | 113 | # Initialize all the variables 114 | 115 | init = tf.global_variables_initializer() 116 | 117 | saver = tf.train.Saver() 118 | 119 | # Call your model defined 120 | y = multiplayer_perception(x, weights, biases) 121 | 122 | # Define cost function and optimizer 123 | cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_)) 124 | training_steps = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function) 125 | 126 | sess = tf.Session() 127 | sess.run(init) 128 | 129 | # Calculate the cost and the accuracy for each epoch 130 | 131 | mse_history = [] 132 | accuracy_history = [] 133 | 134 | for epoch in range(training_epochs): 135 | sess.run(training_steps, feed_dict={x: train_x, y_: train_y}) 136 | cost = sess.run(cost_function, feed_dict={x: train_x, y_: train_y}) 137 | cost_history.append(cost) 138 | correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) 139 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 140 | # 141 | pred_y = sess.run(y, feed_dict={x: test_x}) 142 | mse = tf.reduce_mean(tf.square(pred_y - test_y)) 143 | mse = sess.run(mse) 144 | mse_history.append(mse) 145 | 146 | accuracy = sess.run(accuracy, feed_dict={x: train_x, y_: train_y}) 147 | accuracy_history.append(accuracy) 148 | 149 | print("Epoch : ", epoch, ' - ', cost, ' cost:', "-MSE:", mse, "- Train Accuraccy:", accuracy) 150 | 151 | save_path = saver.save(sess, model_path) 152 | print("Model saved in file: %s" % save_path) 153 | 154 | # Plot mse and acc 155 | plt.plot(mse_history, label="MSE") 156 | plt.show() 157 | plt.plot(accuracy_history, label="Acc") 158 | plt.show() 159 | -------------------------------------------------------------------------------- /play_history.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | import seaborn as sns 4 | 5 | sns.set(style="whitegrid") 6 | # plt.ion() ## Note this correction 7 | # fig = plt.figure() 8 | df = pd.read_csv("reward_history.csv") 9 | x = df.iloc[-100:, 1:].values 10 | sns.distplot(x) 11 | plt.show() 12 | 13 | plt.plot(df.iloc[:, 1].values) 14 | plt.show() -------------------------------------------------------------------------------- /preprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceylon-ai-projects/data_stream_gym/eae10bdf157235b73de36c8f16e3d25e212ca22f/preprocess/__init__.py -------------------------------------------------------------------------------- /preprocess/data_pre_process.py: -------------------------------------------------------------------------------- 1 | from preprocess.pattern_encoder import encode_column_to_range_index, decode_column_to_int 2 | 3 | from ta import * 4 | 5 | 6 | def process_change_series(close_s, step_back_s): 7 | series = (step_back_s - close_s) * 100 / close_s 8 | # print(series[-1:]) 9 | return series.apply(encode_column_to_range_index) 10 | 11 | 12 | def create_data_frame(data_csv, considering_steps=15, 13 | rsi_range=[14, 29, 58, 100], 14 | tsi_range=[14, 29, 58, 100], 15 | emi_range=[9, 11, 20, 100], 16 | aroon_range=[9, 13, 29, 50], 17 | dpo_range=[4, 5, 13, 35]): 18 | data_csv['Ct'] = data_csv.Close.shift(considering_steps) 19 | data_csv.dropna(inplace=True) 20 | # print(data_csv.head()) 21 | df = pd.DataFrame() 22 | close_s = data_csv.Close 23 | df['C'] = close_s 24 | 25 | for rsi_i in rsi_range: 26 | df['RSI({})'.format(rsi_i)] = rsi(close_s) / 100 27 | 28 | for atr_i in tsi_range: 29 | df['ATR({})'.format(atr_i)] = average_true_range(data_csv.High, data_csv.Low, close_s, n=atr_i) 30 | 31 | for ema_i in emi_range: 32 | df['exp({})'.format(ema_i)] = ema(close_s, ema_i) 33 | 34 | for aron_i in aroon_range: 35 | df['arn_d({})'.format(aron_i)] = aroon_down(close_s, n=aron_i) 36 | df['arn_u({})'.format(aron_i)] = aroon_up(close_s, n=aron_i) 37 | # 38 | for dpo_i in dpo_range: 39 | df['dpo({})'.format(dpo_i)] = ema(data_csv.Close, dpo_i) 40 | 41 | # Pattern 42 | series = (close_s.shift(1) - close_s) * 100 / close_s 43 | series = series.apply(encode_column_to_range_index) 44 | df['P1'] = series 45 | df['P2'] = series 46 | df['P3'] = series 47 | df['P4'] = series 48 | # 49 | for back_step in range(2, (considering_steps - 1) + 1): 50 | df['P1'] += process_change_series(close_s, close_s.shift(back_step)) 51 | 52 | # 53 | for back_step in range(2, 5): 54 | df['P2'] += process_change_series(close_s, close_s.shift(back_step)) 55 | 56 | # 57 | for back_step in range(2, 4): 58 | df['P3'] += process_change_series(close_s, close_s.shift(back_step)) 59 | 60 | for back_step in range(2, 10): 61 | df['P4'] += process_change_series(close_s, close_s.shift(back_step)) 62 | # print(df['P']) 63 | df.dropna(inplace=True) 64 | # print(df.values[-10:, -4:]) 65 | df['P1'] = df.P1.apply(decode_column_to_int) 66 | df['P2'] = df.P2.apply(decode_column_to_int) 67 | df['P3'] = df.P3.apply(decode_column_to_int) 68 | df['P4'] = df.P4.apply(decode_column_to_int) 69 | 70 | return df 71 | -------------------------------------------------------------------------------- /preprocess/pattern_encoder.py: -------------------------------------------------------------------------------- 1 | from baseconv import BaseConverter 2 | 3 | x = 0.01 4 | encoder_base = BaseConverter('USD') 5 | 6 | filter_ranges = [ 7 | {'range': (-100, -3 * x), 'label': 'D'}, 8 | {'range': (-3 * x, -2 * x), 'label': 'D'}, 9 | {'range': (-2 * x, -x), 'label': 'D'}, 10 | {'range': (-x, x), 'label': 'S'}, 11 | {'range': (x, 2 * x), 'label': 'U'}, 12 | {'range': (2 * x, 3 * x), 'label': 'U'}, 13 | {'range': (3 * x, 100), 'label': 'U'}, 14 | ] 15 | 16 | 17 | def pattern_to_action(pattern): 18 | pt_i = pattern[:1][0] 19 | pt_t = pattern[-1:][0] 20 | pt = pt_i + pt_t 21 | # print(pt) 22 | if pt == 'UU' or pt == 'SU': 23 | return 2 24 | elif pt == 'DD' or pt == 'SD': 25 | return 0 26 | else: 27 | return 1 28 | 29 | 30 | def encode_column_to_range_index(x, i=0, alpha=0.001): 31 | # print(x,i) 32 | for f in filter_ranges: 33 | if f['range'][0] <= x + (alpha * i) <= f['range'][1]: 34 | return f['label'] 35 | # print("None",x) 36 | return "U" 37 | 38 | 39 | def decode_column_to_int(x): 40 | return int(encoder_base.decode(x)) / float(encoder_base.decode("".rjust(len(x), 'D'))) 41 | 42 | 43 | 44 | encode_column_to_range_index(-0.02) 45 | 46 | decode_column_to_int("UUUDSDSSS") 47 | encoder_base.decode("DDDDDDDDD") -------------------------------------------------------------------------------- /requirments.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | matplotlib 3 | tensorflow-gpu 4 | numpy 5 | torchvision 6 | gym 7 | torch 8 | ta==0.2.0 -------------------------------------------------------------------------------- /trading_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from agent.Agent import Agent 4 | from data.data_manager import get_data_chunk 5 | from environment.EnvPlayer import PlayGround 6 | from environment.Environment import TradeEnvironment 7 | from preprocess.data_pre_process import create_data_frame 8 | 9 | import tensorflow as tf 10 | import os 11 | 12 | dirname = os.path.dirname(__file__) 13 | base_path = dirname + "/model_saved" 14 | model_path = "{}/model".format(base_path) 15 | 16 | base_path = dirname + "/logs" 17 | log_path = "{}".format(base_path) 18 | 19 | print("Model path => {}".format(model_path)) 20 | print("Log path => {}".format(log_path)) 21 | 22 | obs_length = 21 23 | action_size = 3 24 | 25 | # Observations Count 26 | observations = tf.placeholder(shape=[1, obs_length], dtype=tf.float32) 27 | # 0,1,2 BUY, STAY, SELL 28 | actions = tf.placeholder(shape=[None], dtype=tf.int32) 29 | # +1, -1 with discount 30 | rewards = tf.placeholder(shape=[None], dtype=tf.float32) 31 | 32 | # model 33 | Y = tf.layers.dense(observations, 200, activation=tf.nn.relu) 34 | Y = tf.layers.dense(Y, 100, activation=tf.nn.relu) 35 | Y = tf.layers.dense(Y, 50, activation=tf.nn.relu) 36 | Ylogits = tf.layers.dense(Y, action_size) 37 | 38 | # Sample an action from predicted probabilities 39 | sample_op = tf.multinomial(logits=tf.reshape(Ylogits, shape=(1, 3)), num_samples=1) 40 | 41 | # loss 42 | cross_entropies = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot(actions, action_size), 43 | logits=Ylogits) 44 | 45 | loss = tf.reduce_sum(rewards * cross_entropies) 46 | 47 | # Training Operation 48 | optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001, decay=0.99) 49 | train_op = optimizer.minimize(loss) 50 | 51 | init_op = tf.initialize_all_variables() 52 | 53 | saver = tf.train.Saver() 54 | 55 | 56 | def dif_to_action(diff): 57 | if diff < 0: 58 | return 0 # Sell 59 | elif diff == 0: 60 | return 1 # Stay 61 | else: 62 | return 2 # Buy 63 | 64 | 65 | class FxEnv(TradeEnvironment): 66 | 67 | @classmethod 68 | def __reward__(self, state, action, state_t): 69 | if state_t is not None and state is not None: 70 | diff = state_t[:1] - state[:1] 71 | actual_action = dif_to_action(diff) 72 | if actual_action - action == 0: 73 | return 1 74 | else: 75 | return -1 76 | return 0 77 | 78 | 79 | class FxTradeAgent(Agent): 80 | epsilon = 0.5 81 | epsilon_decay = 0.995 82 | train_agent = True 83 | 84 | @classmethod 85 | def after_init(self): 86 | self.sess = tf.Session() 87 | 88 | saver.restore(self.sess, model_path) 89 | # self.sess.run(init_op) 90 | file_writer = tf.summary.FileWriter(log_path, self.sess.graph) 91 | 92 | def get_policy_decision(self, state): 93 | if state is not None: 94 | state = np.reshape(state, (1, obs_length)) 95 | return self.sess.run(sample_op, feed_dict={observations: state}) 96 | return np.argmax(np.random.randint(1, 3, self.action_size)) 97 | 98 | def act(self, state): 99 | # Act with epslion on traning process 100 | if self.train_agent is False: 101 | return self.get_policy_decision(state) 102 | else: 103 | if np.random.rand() >= self.epsilon: 104 | return self.get_policy_decision(state) 105 | else: 106 | return np.argmax(np.random.randint(1, 3, self.action_size)) 107 | 108 | def after_memories(self, train_status): 109 | if train_status: 110 | self.epsilon = self.epsilon * self.epsilon_decay 111 | 112 | def replay(self, memories): 113 | 114 | for state_t_pre, action_t_pre, reward_t_pre, state_t, done in memories: 115 | # if action_t_pre !=0: 116 | # print(action_t_pre) 117 | 118 | action_t_pre = np.array(action_t_pre) 119 | reward_t_pre = np.array(reward_t_pre) 120 | state_t_pre = np.array(state_t_pre) 121 | # Reshape Inputs 122 | action_t_pre = np.reshape(action_t_pre, (1,)) 123 | reward_t_pre = np.reshape(reward_t_pre, (1,)) 124 | state_t_pre = np.reshape(state_t_pre, (1, state_t_pre.shape[0])) 125 | 126 | # print(state_t_pre, action_t_pre, reward_t_pre, state_t) 127 | 128 | feed_dict = { 129 | rewards: reward_t_pre, 130 | observations: state_t_pre, 131 | actions: action_t_pre 132 | } 133 | self.sess.run(train_op, feed_dict=feed_dict) 134 | 135 | saver.save(self.sess, save_path=model_path) 136 | 137 | with tf.name_scope('cross_entropy'): 138 | tf.summary.scalar('cross_entropy', cross_entropies) 139 | 140 | with tf.name_scope('accuracy'): 141 | with tf.name_scope('correct_prediction'): 142 | correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Ylogits, 1)) 143 | with tf.name_scope('accuracy'): 144 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 145 | tf.summary.scalar('accuracy', accuracy) 146 | merged = tf.summary.merge_all() 147 | 148 | # file_writer = tf.summary.FileWriter(log_path, self.sess.graph) 149 | file_writer = tf.summary.FileWriter(log_path) 150 | 151 | 152 | pair_name = "EURUSD" 153 | interval = 1 154 | 155 | future_state = 4 156 | state_size = 47 157 | action_size = 3 158 | considering_steps = 15 159 | 160 | rsi_range = [14] 161 | tsi_range = [14, 29, 58, 100] 162 | emi_range = [3, 89] 163 | aroon_range = [3, 21, 89] 164 | dpo_range = [3, 21, 89] 165 | 166 | chunk_size = 2e4 167 | 168 | fx_agent = FxTradeAgent(max_length=20000) 169 | 170 | data_frames = get_data_chunk(pair_name, interval, 171 | chunk_size=chunk_size) 172 | 173 | playground_step = 0 174 | 175 | for data_frame in data_frames: 176 | print("\n----Start Processing Another Chunk of Data ----") 177 | print(data_frame.head(1)) 178 | print(data_frame.tail(1)) 179 | print("----") 180 | df = create_data_frame(data_frame, 181 | considering_steps=considering_steps, 182 | rsi_range=rsi_range, 183 | tsi_range=tsi_range, 184 | emi_range=emi_range, 185 | aroon_range=aroon_range, 186 | dpo_range=dpo_range) 187 | print("---Data Summary---") 188 | print(df.head()) 189 | print(df.tail()) 190 | print(f"Before Process {len(data_frame)}") 191 | print(f"After Process {len(df)}") 192 | print("\n") 193 | fx_env = FxEnv(df.values) 194 | 195 | # print(state) 196 | pl = PlayGround(env=fx_env, 197 | agent=fx_agent, 198 | time_frame=1, 199 | playground_step=playground_step) 200 | pl.play() 201 | playground_step += 1 202 | --------------------------------------------------------------------------------