├── .gitignore
├── EURUSD1.mini.csv
├── README.md
├── agent
    ├── Agent.py
    └── __init__.py
├── app_worker.py
├── environment
    ├── EnvPlayer.py
    ├── Environment.py
    └── __init__.py
├── main.test.tf.py
├── play_history.py
├── preprocess
    ├── __init__.py
    ├── data_pre_process.py
    └── pattern_encoder.py
├── requirments.txt
└── trading_agent.py


/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | config/
3 | logs/
4 | model_saved/
5 | .idea
6 | *.iml


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # data_stream_gym
 2 | 
 3 | Use this simple lib for implement RL methods with few 
 4 | 
 5 | ## Create environemt
 6 | 
 7 | ```
 8 | class FxEnv(TradeEnvironment):
 9 | 
10 |     @classmethod
11 |     def __reward__(self, state, action):
12 |         return -1 if action > 0 else 1
13 | ```
14 | For futher development you need to implement your own Reward function
15 | 
16 | Overide 
17 | def  __reward__(state,action)
18 | 
19 | 
20 | 
21 | 
22 | ## Create Agent
23 | 
24 | ```
25 | 
26 | class FxTradeAgent(Agent):
27 | 
28 |     def act(self, state):
29 |         return np.argmax(np.random.randint(1, 3, self.action_size))
30 | 
31 |     @classmethod
32 |     def replay(self, memories):
33 |         print(len(memories))
34 | 
35 | ```
36 | 
37 | # Train algo
38 | 
39 | ```
40 | # Create Environment with data values
41 | fx_env = FxEnv(df.values)
42 | pl = PlayGround(env=fx_env, agent=FxTradeAgent(), time_frame=4)
43 | pl.play()
44 | ```
45 | 
46 | ## Licence
47 | MIT
48 | 
49 | 


--------------------------------------------------------------------------------
/agent/Agent.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | 
 3 | import random
 4 | 
 5 | 
 6 | class Agent(object):
 7 | 
 8 |     def __init__(self,
 9 |                  action_size=3,
10 |                  max_length=1000, replay_prob=0.995, forget_rate=0.25):
11 |         self.action_size = action_size
12 |         self.forget_rate = forget_rate
13 |         self.replay_prob = replay_prob
14 |         self.max_length = max_length
15 |         self.__memory__ = deque(maxlen=self.max_length)
16 |         self.after_init()
17 | 
18 |     @classmethod
19 |     def after_init(self):
20 |         pass
21 | 
22 |     @classmethod
23 |     def act(self, state):
24 |         return -1
25 | 
26 |     @classmethod
27 |     def replay(self, memories):
28 |         pass
29 | 
30 |     @classmethod
31 |     def after_memories(self, train_status):
32 |         pass
33 | 
34 |     def memorize(self, state_t, action_t, reward_t, state_t_next, done):
35 |         self.__memory__.append((state_t, action_t, reward_t, state_t_next, done))
36 |         evaluate_request = False
37 |         if len(self.__memory__) == self.__memory__.maxlen:
38 |             if random.uniform(0, 1) <= self.replay_prob:
39 |                 self.replay(self.__memory__)
40 | 
41 |                 # Randomly forgot memories
42 |                 for f in range(int(len(self.__memory__) * (random.uniform(0, self.forget_rate)))):
43 |                     self.__memory__.pop()
44 |                 print("Left Memeory Length {}".format(len(self.__memory__)))
45 | 
46 |             self.__memory__.pop()  # forgot first
47 |             evaluate_request = True
48 |             self.after_memories(evaluate_request)
49 | 
50 |         return evaluate_request
51 | 


--------------------------------------------------------------------------------
/agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceylon-ai-projects/data_stream_gym/eae10bdf157235b73de36c8f16e3d25e212ca22f/agent/__init__.py


--------------------------------------------------------------------------------
/app_worker.py:
--------------------------------------------------------------------------------
 1 | from celery import Celery
 2 | import csv
 3 | 
 4 | app = Celery('tasks', broker='redis://localhost:6379/')
 5 | 
 6 | 
 7 | @app.task
 8 | def write_data_hist(x, y):
 9 |     with open(r'reward_history.csv', 'a', newline='') as csvfile:
10 |         fieldnames = ['x', 'y']
11 |         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
12 |         writer.writerow({'x': x, 'y': y})
13 |         x += 1
14 | 


--------------------------------------------------------------------------------
/environment/EnvPlayer.py:
--------------------------------------------------------------------------------
 1 | from app_worker import write_data_hist
 2 | 
 3 | 
 4 | class PlayGround(object):
 5 |     reward_history = []
 6 | 
 7 |     def __init__(self, env, agent, time_frame=4, playground_step=0):
 8 |         self.playground_step = playground_step
 9 |         self.time_frame = time_frame
10 |         self.env = env
11 |         self.agent = agent
12 | 
13 |     def play(self):
14 |         done = False
15 |         state_t_pre = None
16 |         action_t_pre = None
17 |         steps = 0
18 |         agent_act = False
19 |         reward_recodes = 0
20 |         while done is False:
21 |             state_t, done = self.env.get_next_state()
22 | 
23 |             step_ = self.playground_step + steps
24 | 
25 |             if steps % self.time_frame == 0:
26 |                 if agent_act:
27 |                     reward_t_pre = self.env.calculate_reward(state_t_pre, action_t_pre, state_t)
28 | 
29 |                     # Recode history
30 | 
31 |                     # print(state_t_pre[:1], action_t_pre, state_t[:1], reward_t_pre)
32 |                     # print(step_)
33 | 
34 |                     if step_ % 10 == 0:
35 |                         write_data_hist.delay(step_, reward_recodes / steps)
36 |                     reward_recodes += reward_t_pre
37 |                     self.agent.memorize(state_t_pre, action_t_pre, reward_t_pre, state_t, done)
38 |                     agent_act = False
39 | 
40 |                 if agent_act is False:
41 |                     action_t = self.agent.act(state_t)
42 |                     # print(action_t)
43 |                     agent_act = True
44 |                     action_t_pre = action_t
45 |                     state_t_pre = state_t
46 | 
47 |             steps += 1
48 | 


--------------------------------------------------------------------------------
/environment/Environment.py:
--------------------------------------------------------------------------------
 1 | class TradeEnvironment(object):
 2 | 
 3 |     def __init__(self, data):
 4 |         self.__data_feed = iter(data)
 5 |         self.pre_state = None
 6 | 
 7 |     @classmethod
 8 |     def __reward__(self, state, action, next_state):
 9 |         return 0
10 | 
11 |     def calculate_reward(self, state, action, next_state):
12 |         reward = self.__reward__(state, action, next_state)
13 |         return reward
14 | 
15 |     def get_next_state(self):
16 |         '''
17 |         :param last_action: action for the last state
18 |         :return:
19 |             reward - reward amount for your last action again last state
20 |             state_next - next state
21 |             done - True if the data stream end
22 |         '''
23 |         try:
24 |             state_next = self.__data_feed.__next__()
25 |         except:
26 |             state_next = None
27 |         done = True if state_next is None else False
28 |         self.pre_state = state_next
29 |         return state_next, done
30 | 


--------------------------------------------------------------------------------
/environment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceylon-ai-projects/data_stream_gym/eae10bdf157235b73de36c8f16e3d25e212ca22f/environment/__init__.py


--------------------------------------------------------------------------------
/main.test.tf.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | from sklearn.preprocessing import LabelEncoder
  7 | from sklearn.utils import shuffle
  8 | from sklearn.model_selection import train_test_split
  9 | 
 10 | base_path = "/home/dewmal/WorkingProjects/ceylon_models/trading/trader/"
 11 | 
 12 | 
 13 | def read_dataset():
 14 |     df = pd.read_csv("{}Sonar.csv".format(base_path))
 15 |     print(len(df.columns))
 16 |     # Variables
 17 |     x = df[df.columns[0:60]].values
 18 |     y = df[df.columns[60]]
 19 | 
 20 |     # Encode the dependent variable
 21 |     encoder = LabelEncoder()
 22 |     encoder.fit(y)
 23 | 
 24 |     y = encoder.transform(y)
 25 |     y = one_hot_encoder(y)
 26 | 
 27 |     print(x.shape)
 28 |     return (x, y)
 29 | 
 30 | 
 31 | def one_hot_encoder(labels):
 32 |     n_labels = len(labels)
 33 |     n_unique_labels = len(np.unique(labels))
 34 | 
 35 |     one_hot_encode = np.zeros((n_labels, n_unique_labels))
 36 |     one_hot_encode[np.arange(n_labels), labels] = 1
 37 |     return one_hot_encode
 38 | 
 39 | 
 40 | # Read the dataset
 41 | 
 42 | X, Y = read_dataset()
 43 | 
 44 | # Shuffle the dataset to mix up the rows
 45 | X, Y = shuffle(X, Y, random_state=1)
 46 | 
 47 | # Split data set to train and test
 48 | train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.20, random_state=415)
 49 | 
 50 | # Inspect The shape of train and testing
 51 | print(train_x.shape)
 52 | print(train_y.shape)
 53 | print(test_x.shape)
 54 | 
 55 | # Define the important params and variables to work with tensor
 56 | learning_rate = 0.3
 57 | training_epochs = 100
 58 | cost_history = []
 59 | n_dim = X.shape[1]
 60 | print("n dim", n_dim)
 61 | n_class = 2
 62 | model_path = "{}model_saved".format(base_path)
 63 | 
 64 | # Define the number of hidden layers and number of neurons for each layer
 65 | n_hidden_1 = 60
 66 | n_hidden_2 = 60
 67 | n_hidden_3 = 60
 68 | n_hidden_4 = 60
 69 | 
 70 | x = tf.placeholder(tf.float32, [None, n_dim])
 71 | W = tf.Variable(tf.zeros([n_dim, n_class]))
 72 | b = tf.Variable(tf.zeros([n_class]))
 73 | y_ = tf.placeholder(tf.float32, [None, n_class])
 74 | 
 75 | 
 76 | def multiplayer_perception(x, weights, biases):
 77 |     # Hidden layer with RELU Activation
 78 |     layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
 79 |     layer_1 = tf.nn.relu(layer_1)
 80 | 
 81 |     # Hidden layer with sigmoid activation
 82 |     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
 83 |     layer_2 = tf.nn.sigmoid(layer_2)
 84 | 
 85 |     # Hidden layer with sigmoid activation
 86 |     layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
 87 |     layer_3 = tf.nn.sigmoid(layer_3)
 88 | 
 89 |     # Hidden layer with RELU Activation
 90 |     layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
 91 |     layer_4 = tf.nn.relu(layer_4)
 92 | 
 93 |     out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
 94 |     return out_layer
 95 | 
 96 | 
 97 | weights = {
 98 |     'h1': tf.Variable(tf.truncated_normal([n_dim, n_hidden_1])),
 99 |     'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])),
100 |     'h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3])),
101 |     'h4': tf.Variable(tf.truncated_normal([n_hidden_3, n_hidden_4])),
102 |     'out': tf.Variable(tf.truncated_normal([n_hidden_4, n_class])),
103 | }
104 | 
105 | biases = {
106 |     'b1': tf.Variable(tf.truncated_normal([n_hidden_1])),
107 |     'b2': tf.Variable(tf.truncated_normal([n_hidden_2])),
108 |     'b3': tf.Variable(tf.truncated_normal([n_hidden_3])),
109 |     'b4': tf.Variable(tf.truncated_normal([n_hidden_4])),
110 |     'out': tf.Variable(tf.truncated_normal([n_class])),
111 | }
112 | 
113 | # Initialize all the variables
114 | 
115 | init = tf.global_variables_initializer()
116 | 
117 | saver = tf.train.Saver()
118 | 
119 | # Call your model defined
120 | y = multiplayer_perception(x, weights, biases)
121 | 
122 | # Define cost function and optimizer
123 | cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
124 | training_steps = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
125 | 
126 | sess = tf.Session()
127 | sess.run(init)
128 | 
129 | # Calculate the cost and the accuracy for each epoch
130 | 
131 | mse_history = []
132 | accuracy_history = []
133 | 
134 | for epoch in range(training_epochs):
135 |     sess.run(training_steps, feed_dict={x: train_x, y_: train_y})
136 |     cost = sess.run(cost_function, feed_dict={x: train_x, y_: train_y})
137 |     cost_history.append(cost)
138 |     correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
139 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
140 |     #
141 |     pred_y = sess.run(y, feed_dict={x: test_x})
142 |     mse = tf.reduce_mean(tf.square(pred_y - test_y))
143 |     mse = sess.run(mse)
144 |     mse_history.append(mse)
145 | 
146 |     accuracy = sess.run(accuracy, feed_dict={x: train_x, y_: train_y})
147 |     accuracy_history.append(accuracy)
148 | 
149 |     print("Epoch : ", epoch, ' - ', cost, ' cost:', "-MSE:", mse, "- Train Accuraccy:", accuracy)
150 | 
151 | save_path = saver.save(sess, model_path)
152 | print("Model saved in file: %s" % save_path)
153 | 
154 | # Plot mse and acc
155 | plt.plot(mse_history, label="MSE")
156 | plt.show()
157 | plt.plot(accuracy_history, label="Acc")
158 | plt.show()
159 | 


--------------------------------------------------------------------------------
/play_history.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | import seaborn as sns
 4 | 
 5 | sns.set(style="whitegrid")
 6 | # plt.ion()  ## Note this correction
 7 | # fig = plt.figure()
 8 | df = pd.read_csv("reward_history.csv")
 9 | x = df.iloc[-100:, 1:].values
10 | sns.distplot(x)
11 | plt.show()
12 | 
13 | plt.plot(df.iloc[:, 1].values)
14 | plt.show()


--------------------------------------------------------------------------------
/preprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceylon-ai-projects/data_stream_gym/eae10bdf157235b73de36c8f16e3d25e212ca22f/preprocess/__init__.py


--------------------------------------------------------------------------------
/preprocess/data_pre_process.py:
--------------------------------------------------------------------------------
 1 | from preprocess.pattern_encoder import encode_column_to_range_index, decode_column_to_int
 2 | 
 3 | from ta import *
 4 | 
 5 | 
 6 | def process_change_series(close_s, step_back_s):
 7 |     series = (step_back_s - close_s) * 100 / close_s
 8 |     # print(series[-1:])
 9 |     return series.apply(encode_column_to_range_index)
10 | 
11 | 
12 | def create_data_frame(data_csv, considering_steps=15,
13 |                       rsi_range=[14, 29, 58, 100],
14 |                       tsi_range=[14, 29, 58, 100],
15 |                       emi_range=[9, 11, 20, 100],
16 |                       aroon_range=[9, 13, 29, 50],
17 |                       dpo_range=[4, 5, 13, 35]):
18 |     data_csv['Ct'] = data_csv.Close.shift(considering_steps)
19 |     data_csv.dropna(inplace=True)
20 |     # print(data_csv.head())
21 |     df = pd.DataFrame()
22 |     close_s = data_csv.Close
23 |     df['C'] = close_s
24 | 
25 |     for rsi_i in rsi_range:
26 |         df['RSI({})'.format(rsi_i)] = rsi(close_s) / 100
27 | 
28 |     for atr_i in tsi_range:
29 |         df['ATR({})'.format(atr_i)] = average_true_range(data_csv.High, data_csv.Low, close_s, n=atr_i)
30 | 
31 |     for ema_i in emi_range:
32 |         df['exp({})'.format(ema_i)] = ema(close_s, ema_i)
33 | 
34 |     for aron_i in aroon_range:
35 |         df['arn_d({})'.format(aron_i)] = aroon_down(close_s, n=aron_i)
36 |         df['arn_u({})'.format(aron_i)] = aroon_up(close_s, n=aron_i)
37 |     #
38 |     for dpo_i in dpo_range:
39 |         df['dpo({})'.format(dpo_i)] = ema(data_csv.Close, dpo_i)
40 | 
41 |     # Pattern
42 |     series = (close_s.shift(1) - close_s) * 100 / close_s
43 |     series = series.apply(encode_column_to_range_index)
44 |     df['P1'] = series
45 |     df['P2'] = series
46 |     df['P3'] = series
47 |     df['P4'] = series
48 |     #
49 |     for back_step in range(2, (considering_steps - 1) + 1):
50 |         df['P1'] += process_change_series(close_s, close_s.shift(back_step))
51 | 
52 |     #
53 |     for back_step in range(2, 5):
54 |         df['P2'] += process_change_series(close_s, close_s.shift(back_step))
55 | 
56 |     #
57 |     for back_step in range(2, 4):
58 |         df['P3'] += process_change_series(close_s, close_s.shift(back_step))
59 | 
60 |     for back_step in range(2, 10):
61 |         df['P4'] += process_change_series(close_s, close_s.shift(back_step))
62 |     # print(df['P'])
63 |     df.dropna(inplace=True)
64 |     # print(df.values[-10:, -4:])
65 |     df['P1'] = df.P1.apply(decode_column_to_int)
66 |     df['P2'] = df.P2.apply(decode_column_to_int)
67 |     df['P3'] = df.P3.apply(decode_column_to_int)
68 |     df['P4'] = df.P4.apply(decode_column_to_int)
69 | 
70 |     return df
71 | 


--------------------------------------------------------------------------------
/preprocess/pattern_encoder.py:
--------------------------------------------------------------------------------
 1 | from baseconv import BaseConverter
 2 | 
 3 | x = 0.01
 4 | encoder_base = BaseConverter('USD')
 5 | 
 6 | filter_ranges = [
 7 |     {'range': (-100, -3 * x), 'label': 'D'},
 8 |     {'range': (-3 * x, -2 * x), 'label': 'D'},
 9 |     {'range': (-2 * x, -x), 'label': 'D'},
10 |     {'range': (-x, x), 'label': 'S'},
11 |     {'range': (x, 2 * x), 'label': 'U'},
12 |     {'range': (2 * x, 3 * x), 'label': 'U'},
13 |     {'range': (3 * x, 100), 'label': 'U'},
14 | ]
15 | 
16 | 
17 | def pattern_to_action(pattern):
18 |     pt_i = pattern[:1][0]
19 |     pt_t = pattern[-1:][0]
20 |     pt = pt_i + pt_t
21 |     # print(pt)
22 |     if pt == 'UU' or pt == 'SU':
23 |         return 2
24 |     elif pt == 'DD' or pt == 'SD':
25 |         return 0
26 |     else:
27 |         return 1
28 | 
29 | 
30 | def encode_column_to_range_index(x, i=0, alpha=0.001):
31 |     # print(x,i)
32 |     for f in filter_ranges:
33 |         if f['range'][0] <= x + (alpha * i) <= f['range'][1]:
34 |             return f['label']
35 |     # print("None",x)
36 |     return "U"
37 | 
38 | 
39 | def decode_column_to_int(x):
40 |     return int(encoder_base.decode(x)) / float(encoder_base.decode("".rjust(len(x), 'D')))
41 | 
42 | 
43 | 
44 | encode_column_to_range_index(-0.02)
45 | 
46 | decode_column_to_int("UUUDSDSSS")
47 | encoder_base.decode("DDDDDDDDD")


--------------------------------------------------------------------------------
/requirments.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | matplotlib
3 | tensorflow-gpu
4 | numpy
5 | torchvision
6 | gym
7 | torch
8 | ta==0.2.0


--------------------------------------------------------------------------------
/trading_agent.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from agent.Agent import Agent
  4 | from data.data_manager import get_data_chunk
  5 | from environment.EnvPlayer import PlayGround
  6 | from environment.Environment import TradeEnvironment
  7 | from preprocess.data_pre_process import create_data_frame
  8 | 
  9 | import tensorflow as tf
 10 | import os
 11 | 
 12 | dirname = os.path.dirname(__file__)
 13 | base_path = dirname + "/model_saved"
 14 | model_path = "{}/model".format(base_path)
 15 | 
 16 | base_path = dirname + "/logs"
 17 | log_path = "{}".format(base_path)
 18 | 
 19 | print("Model path => {}".format(model_path))
 20 | print("Log path => {}".format(log_path))
 21 | 
 22 | obs_length = 21
 23 | action_size = 3
 24 | 
 25 | # Observations Count
 26 | observations = tf.placeholder(shape=[1, obs_length], dtype=tf.float32)
 27 | # 0,1,2 BUY, STAY, SELL
 28 | actions = tf.placeholder(shape=[None], dtype=tf.int32)
 29 | # +1, -1 with discount
 30 | rewards = tf.placeholder(shape=[None], dtype=tf.float32)
 31 | 
 32 | # model
 33 | Y = tf.layers.dense(observations, 200, activation=tf.nn.relu)
 34 | Y = tf.layers.dense(Y, 100, activation=tf.nn.relu)
 35 | Y = tf.layers.dense(Y, 50, activation=tf.nn.relu)
 36 | Ylogits = tf.layers.dense(Y, action_size)
 37 | 
 38 | # Sample an action from predicted probabilities
 39 | sample_op = tf.multinomial(logits=tf.reshape(Ylogits, shape=(1, 3)), num_samples=1)
 40 | 
 41 | # loss
 42 | cross_entropies = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot(actions, action_size),
 43 |                                                   logits=Ylogits)
 44 | 
 45 | loss = tf.reduce_sum(rewards * cross_entropies)
 46 | 
 47 | # Training Operation
 48 | optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001, decay=0.99)
 49 | train_op = optimizer.minimize(loss)
 50 | 
 51 | init_op = tf.initialize_all_variables()
 52 | 
 53 | saver = tf.train.Saver()
 54 | 
 55 | 
 56 | def dif_to_action(diff):
 57 |     if diff < 0:
 58 |         return 0  # Sell
 59 |     elif diff == 0:
 60 |         return 1  # Stay
 61 |     else:
 62 |         return 2  # Buy
 63 | 
 64 | 
 65 | class FxEnv(TradeEnvironment):
 66 | 
 67 |     @classmethod
 68 |     def __reward__(self, state, action, state_t):
 69 |         if state_t is not None and state is not None:
 70 |             diff = state_t[:1] - state[:1]
 71 |             actual_action = dif_to_action(diff)
 72 |             if actual_action - action == 0:
 73 |                 return 1
 74 |             else:
 75 |                 return -1
 76 |         return 0
 77 | 
 78 | 
 79 | class FxTradeAgent(Agent):
 80 |     epsilon = 0.5
 81 |     epsilon_decay = 0.995
 82 |     train_agent = True
 83 | 
 84 |     @classmethod
 85 |     def after_init(self):
 86 |         self.sess = tf.Session()
 87 | 
 88 |         saver.restore(self.sess, model_path)
 89 |         # self.sess.run(init_op)
 90 |         file_writer = tf.summary.FileWriter(log_path, self.sess.graph)
 91 | 
 92 |     def get_policy_decision(self, state):
 93 |         if state is not None:
 94 |             state = np.reshape(state, (1, obs_length))
 95 |             return self.sess.run(sample_op, feed_dict={observations: state})
 96 |         return np.argmax(np.random.randint(1, 3, self.action_size))
 97 | 
 98 |     def act(self, state):
 99 |         # Act with epslion on traning process
100 |         if self.train_agent is False:
101 |             return self.get_policy_decision(state)
102 |         else:
103 |             if np.random.rand() >= self.epsilon:
104 |                 return self.get_policy_decision(state)
105 |             else:
106 |                 return np.argmax(np.random.randint(1, 3, self.action_size))
107 | 
108 |     def after_memories(self, train_status):
109 |         if train_status:
110 |             self.epsilon = self.epsilon * self.epsilon_decay
111 | 
112 |     def replay(self, memories):
113 | 
114 |         for state_t_pre, action_t_pre, reward_t_pre, state_t, done in memories:
115 |             # if action_t_pre !=0:
116 |             #     print(action_t_pre)
117 | 
118 |             action_t_pre = np.array(action_t_pre)
119 |             reward_t_pre = np.array(reward_t_pre)
120 |             state_t_pre = np.array(state_t_pre)
121 |             # Reshape Inputs
122 |             action_t_pre = np.reshape(action_t_pre, (1,))
123 |             reward_t_pre = np.reshape(reward_t_pre, (1,))
124 |             state_t_pre = np.reshape(state_t_pre, (1, state_t_pre.shape[0]))
125 | 
126 |             # print(state_t_pre, action_t_pre, reward_t_pre, state_t)
127 | 
128 |             feed_dict = {
129 |                 rewards: reward_t_pre,
130 |                 observations: state_t_pre,
131 |                 actions: action_t_pre
132 |             }
133 |             self.sess.run(train_op, feed_dict=feed_dict)
134 | 
135 |         saver.save(self.sess, save_path=model_path)
136 | 
137 |         with tf.name_scope('cross_entropy'):
138 |             tf.summary.scalar('cross_entropy', cross_entropies)
139 | 
140 |         with tf.name_scope('accuracy'):
141 |             with tf.name_scope('correct_prediction'):
142 |                 correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Ylogits, 1))
143 |             with tf.name_scope('accuracy'):
144 |                 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
145 |             tf.summary.scalar('accuracy', accuracy)
146 |         merged = tf.summary.merge_all()
147 | 
148 |         # file_writer = tf.summary.FileWriter(log_path, self.sess.graph)
149 |         file_writer = tf.summary.FileWriter(log_path)
150 | 
151 | 
152 | pair_name = "EURUSD"
153 | interval = 1
154 | 
155 | future_state = 4
156 | state_size = 47
157 | action_size = 3
158 | considering_steps = 15
159 | 
160 | rsi_range = [14]
161 | tsi_range = [14, 29, 58, 100]
162 | emi_range = [3, 89]
163 | aroon_range = [3, 21, 89]
164 | dpo_range = [3, 21, 89]
165 | 
166 | chunk_size = 2e4
167 | 
168 | fx_agent = FxTradeAgent(max_length=20000)
169 | 
170 | data_frames = get_data_chunk(pair_name, interval,
171 |                              chunk_size=chunk_size)
172 | 
173 | playground_step = 0
174 | 
175 | for data_frame in data_frames:
176 |     print("\n----Start Processing Another Chunk of Data ----")
177 |     print(data_frame.head(1))
178 |     print(data_frame.tail(1))
179 |     print("----")
180 |     df = create_data_frame(data_frame,
181 |                            considering_steps=considering_steps,
182 |                            rsi_range=rsi_range,
183 |                            tsi_range=tsi_range,
184 |                            emi_range=emi_range,
185 |                            aroon_range=aroon_range,
186 |                            dpo_range=dpo_range)
187 |     print("---Data Summary---")
188 |     print(df.head())
189 |     print(df.tail())
190 |     print(f"Before Process {len(data_frame)}")
191 |     print(f"After Process {len(df)}")
192 |     print("\n")
193 |     fx_env = FxEnv(df.values)
194 | 
195 |     # print(state)
196 |     pl = PlayGround(env=fx_env,
197 |                     agent=fx_agent,
198 |                     time_frame=1,
199 |                     playground_step=playground_step)
200 |     pl.play()
201 |     playground_step += 1
202 | 


--------------------------------------------------------------------------------