├── Evaluate.py ├── GA_Tools.py ├── GeneMap.py ├── Tools.py └── TradingSimulation.py /Evaluate.py: -------------------------------------------------------------------------------- 1 | import Tools as tools 2 | import TradingSimulation as sim 3 | import pickle 4 | import numpy as np 5 | import GeneMap as gmap 6 | import GA_Tools as ga 7 | reload(ga) 8 | reload(gmap) 9 | reload(tools) 10 | reload(sim) 11 | import time 12 | import json 13 | 14 | 15 | 16 | pop_size = 25 # Size of population 17 | retain_rate = 0.2 18 | gen_random_pop = False 19 | 20 | ############# 21 | # Load Data # 22 | ############# 23 | 24 | f=open('usdjpy_data.obj', 'r') 25 | all_data = pickle.load(f) 26 | df = all_data[all_data.Volume>0] 27 | 28 | #df = df[:200000] # Smaller data set to test and run faster 29 | df = sim.collapse_data(df, 30) 30 | ###################### 31 | # Generate Population # 32 | ###################### 33 | SignalsMap = gmap.SignalsMap 34 | StopsMap = gmap.StopsMap 35 | prob_enabled = 0.1 36 | 37 | if gen_random_pop: 38 | tools.gen_random_population(pop_size, SignalsMap, StopsMap, prob_enabled, 39 | json_file_path = 'populations/population.json') 40 | 41 | json_file = open('populations/population.json') 42 | D = json.load(json_file) 43 | json_file.close() 44 | 45 | parent_pop = D['population'] 46 | iteration = D['iteration'] 47 | print "Currently at %s iteration" % iteration 48 | #################### 49 | # Simulate Trading # 50 | #################### 51 | 52 | simulation_start_time = time.time() 53 | pop_fitness = [] 54 | for trader in parent_pop: 55 | simulation = tools.simulate_strategy(df, trader, SignalsMap, verbose=False, direction=1) 56 | 57 | profit= simulation.calculate_profits() 58 | pop_fitness.append(abs(profit)) 59 | print profit, len(simulation.orders_record), pop_fitness[-1] 60 | 61 | print "Simulation took %s seconds" % (time.time() - simulation_start_time) 62 | 63 | ########################## 64 | # Produce New Population # 65 | ########################## 66 | 67 | # Retain the best of the population 68 | new_pop = [] 69 | indx = np.argsort(pop_fitness) 70 | cutoff = int(len(parent_pop) * retain_rate) 71 | best_indx = indx[-cutoff:] 72 | 73 | for i in best_indx[::-1]: 74 | new_pop.append(parent_pop[i]) 75 | 76 | cdf_vals = ga.cdf(pop_fitness) 77 | print cdf_vals 78 | while len(new_pop) < len(parent_pop): 79 | new_trader = {} 80 | trader1 = ga.choice(parent_pop, cdf_vals) 81 | trader2 = ga.choice(parent_pop, cdf_vals) 82 | 83 | off_entry = ga.mate_random(trader1['EntrySignals'], trader2['EntrySignals']) 84 | off_entry = ga.mutate_value_genes(off_entry, SignalsMap) 85 | off_entry = ga.mutate_enabled_genes(off_entry) 86 | new_trader['EntrySignals'] = tools.validate_chrom(off_entry) 87 | 88 | 89 | off_exit = ga.mate_random(trader1['ExitSignals'], trader2['ExitSignals']) 90 | off_exit = ga.mutate_value_genes(off_exit, SignalsMap) 91 | off_exit = ga.mutate_enabled_genes(off_exit) 92 | new_trader['ExitSignals'] = off_exit 93 | 94 | off_stop = ga.mate_random(trader1['StopSignals'], trader2['StopSignals']) 95 | off_stop = ga.mutate_value_genes(off_stop, SignalsMap) 96 | new_trader['StopSignals'] = off_stop 97 | new_pop.append(new_trader) 98 | 99 | 100 | D={'population': new_pop} 101 | D['iteration'] = iteration + 1 102 | with open('populations/population.json', 'w') as outputfile: 103 | json.dump(D, outputfile) 104 | 105 | 106 | -------------------------------------------------------------------------------- /GA_Tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from bisect import bisect 3 | import random 4 | 5 | def cdf(weights): 6 | total=sum(weights) * 1. 7 | result=[] 8 | cumsum=0 9 | for w in weights: 10 | cumsum += w 11 | result.append(cumsum/total) 12 | return result 13 | 14 | def choice(population, cdf_vals): 15 | """ 16 | Returns a random element of population sampled according 17 | to the weights cdf_vals (produced by the func cdf) 18 | Inputs 19 | ------ 20 | population: list, a list with objects to be sampled from 21 | cdf_vals: list/array with cdfs (produced by the func cdf) 22 | Returns 23 | ------- 24 | An element from the list population 25 | """ 26 | assert len(population) == len(cdf_vals) 27 | x = random.random() 28 | idx = bisect(cdf_vals,x) 29 | return population[idx] 30 | 31 | def mate_random(chrom1, chrom2): 32 | """ 33 | Mates two chromosomes by exchangin randomly each gene 34 | """ 35 | offspring = [] 36 | 37 | for pos in range(len(chrom1)): 38 | gene1 = [] 39 | for gene_pos in range(len(chrom1[pos])): 40 | if random.getrandbits(1) == 0: 41 | gene1.append(chrom1[pos][gene_pos]) 42 | else: 43 | gene1.append(chrom2[pos][gene_pos]) 44 | 45 | offspring.append(tuple(gene1)) 46 | return tuple(offspring) 47 | 48 | def truncated_random_normal(val, val_range, share=0.1): 49 | """ 50 | returns a random normal around val within the range 51 | (used for mutation) 52 | share: the share of the range which is equal to the st.dev 53 | """ 54 | sig = share * (val_range[1] - val_range[0]) 55 | res = np.random.normal(val, sig) 56 | res = min(res, val_range[1]) 57 | res = max(res, val_range[0]) 58 | return res 59 | 60 | def mutate_value_genes(chrom, SignalsMap, mutate_prob=1e-3, verbose=False): 61 | mutated_chrom = [] 62 | for pos in range(len(chrom)): 63 | gene = list(chrom[pos]) 64 | for gene_pos in range(len(gene[:-1])): 65 | if np.random.uniform() < mutate_prob: 66 | val_range = SignalsMap[pos][gene_pos+1] 67 | val = gene[gene_pos] 68 | new_val = truncated_random_normal(val, val_range) 69 | if verbose: 70 | print "Mutating at ", pos, gene_pos 71 | print "Changing from ", val, " to ", new_val 72 | 73 | gene[gene_pos] = new_val 74 | 75 | 76 | gene = tuple(gene) 77 | mutated_chrom.append(gene) 78 | return tuple(mutated_chrom) 79 | 80 | def mutate_enabled_genes(chrom, mutate_prob_on=1e-3, mutate_prob_off=1e-2, verbose=False): 81 | mutated_chrom = [] 82 | for pos in range(len(chrom)): 83 | gene = list(chrom[pos]) 84 | switch = False 85 | if gene[-1]: # gene is enabled 86 | if np.random.uniform() < mutate_prob_off: 87 | switch = True 88 | else: 89 | if np.random.uniform() < mutate_prob_on: 90 | switch = True 91 | if switch: 92 | if verbose: 93 | print pos, " switch from ", gene[-1] 94 | 95 | gene[-1] = bool(np.invert(gene[-1])) # You need to transform from numpy.bool to bool for the json.dump to work !! 96 | 97 | mutated_chrom.append(tuple(gene)) 98 | return tuple(mutated_chrom) 99 | 100 | 101 | -------------------------------------------------------------------------------- /GeneMap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provides a Gene Map for the genes 3 | Each gene is a function and the map is a tuple. 4 | """ 5 | import numpy as np 6 | import copy 7 | 8 | 9 | 10 | # Map for the genes describing the stops 11 | 12 | StopsMap = [ 13 | (None, (5, 100) ), # Stop Loss 14 | (None, (5, 100) ), # Take Profit 15 | (None, (5, 500) ), # Maximum hold time (in number of periods) 16 | ] 17 | 18 | SignalsMap = [] # Map of the signal genes 19 | 20 | ########################### 21 | # Change From Last Period # 22 | ########################### 23 | 24 | class Change_One_Period(object): 25 | 26 | def __init__(self, t): 27 | self.t = t 28 | 29 | def func(self, func_args): 30 | """ 31 | Returns True if the change from period t to t-1 was 32 | within the range: value-error : value+error 33 | """ 34 | 35 | series, value, error, enabled = func_args 36 | if not enabled: 37 | return True 38 | change = (series[-self.t-1] - series[-self.t-2]) / series[-self.t-2] 39 | return value-error < change < value + error 40 | 41 | range_ch_val = (-0.01, 0.01) 42 | range_ch_err = (0, 0.0001) 43 | 44 | # Add t conditions for the past t periods changes 45 | for t in range(6): 46 | cls = Change_One_Period(t) 47 | SignalsMap.append( 48 | (cls.func, range_ch_val, range_ch_err)) 49 | 50 | ###################################### 51 | # Price Level Relative to Hist Price # 52 | ###################################### 53 | 54 | # Checks if the current price is below above some historical max/min price 55 | 56 | class HistValues(object): 57 | # Use level price series 58 | def __init__(self, t, n_periods, func_type): 59 | """ 60 | n_periods : number of periods over which you take the max price 61 | """ 62 | self.t = t 63 | self.n_periods = n_periods 64 | self.func_type = func_type 65 | 66 | def func(self, func_args): 67 | """ 68 | Returns if the distance from max/min price is 69 | within given range 70 | """ 71 | 72 | series, value, error, enabled = func_args 73 | if not enabled: 74 | return True 75 | price = self.func_type(series[-self.n_periods:]) 76 | x = series[-self.t - 1] / price 77 | return value - error < x < value + error 78 | 79 | pr_n_periods = 1e3 80 | range_pr_val = (0.5, 1.5) 81 | range_pr_err = (0, 1) 82 | cls = HistValues(0, pr_n_periods, np.max) 83 | SignalsMap.append( 84 | (cls.func, range_pr_val, range_pr_err) 85 | ) 86 | 87 | cls = HistValues(0, pr_n_periods, np.min) 88 | SignalsMap.append( 89 | (cls.func, range_pr_val, range_pr_err) 90 | ) 91 | 92 | ############## 93 | # Volatility # 94 | ############## 95 | 96 | class Volatility(object): 97 | # Use level price series 98 | def __init__(self, n_periods): 99 | self.n_periods = n_periods 100 | 101 | def func(self, func_args): 102 | series, value, error, enabled = func_args 103 | if not enabled: 104 | return True 105 | vol = np.std(series[-self.n_periods:]) / np.mean(series[-self.n_periods:]) 106 | return value - error < vol < value + error 107 | 108 | vol_n_periods = 1e3 109 | range_vol_val = (0, 2) 110 | range_vol_err = (0, 0.1) 111 | cls = Volatility(vol_n_periods) 112 | SignalsMap.append( 113 | (cls.func, range_vol_val, range_vol_err) 114 | ) 115 | 116 | ####################### 117 | # Chartist Indicators # 118 | ####################### 119 | 120 | class MovAvCross(object): 121 | def __init__(self, indicator): 122 | self.indicator = indicator 123 | 124 | def func(self, func_args): 125 | 126 | series, MA1, MA2, enabled = func_args 127 | if not enabled: 128 | return True 129 | mean1 = np.mean(series[-MA1:]) 130 | mean2 = np.mean(series[-MA2:]) 131 | 132 | mean1_lag = np.mean(series[-MA1 - 1:-1]) 133 | mean2_lag = np.mean(series[-MA2 - 1:-1]) 134 | 135 | if self.indicator == "Cross from below": 136 | # Returns True if the current moving averages cross (mean1==mean2) 137 | # and the "short" (mean2_lag) moving average last period was below the long (mean1_lag) 138 | # Crossed it from below 139 | return (mean1 == mean2) & (mean2_lag < mean1_lag) 140 | elif self.indicator == "Cross from above": # sell signal 141 | return (mean1 == mean2) & (mean2_lag > mean1_lag) 142 | else: 143 | print "Indicator ust be either 'Cross from below' or 'from above' -- Error" 144 | raise 145 | 146 | range_movav_ma1 = (2, 1e3) 147 | range_movav_ma2 = (2, 1e3) 148 | 149 | cls = MovAvCross(indicator="Cross from below") 150 | SignalsMap.append( 151 | (cls.func, range_movav_ma1, range_movav_ma2) 152 | ) 153 | 154 | cls = MovAvCross(indicator="Cross from above") 155 | SignalsMap.append( 156 | (cls.func, range_movav_ma1, range_movav_ma2) 157 | ) 158 | 159 | -------------------------------------------------------------------------------- /Tools.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for the genetic algorithm simulation and estimation 3 | """ 4 | import numpy as np 5 | import pandas as pd 6 | import GeneMap as gmap 7 | import TradingSimulation as sim 8 | reload(gmap) 9 | reload(sim) 10 | import json 11 | 12 | def gen_random_chrom(signal_map, prob_enabled): 13 | """ 14 | Generates a random chromosome based on a signal map 15 | signal_map: list, each element is a gene. 16 | Each gene is a tuple(func, range_first_arg, range_second_arg ...) 17 | prob_enabled: probability that a gene is enabled 18 | 19 | Returns: 20 | chrom: tuple, each element of chrome is a gene tuple 21 | gene = (first_arg, second_arg, ...., enabled) 22 | """ 23 | chrom = [] 24 | for gene in signal_map: 25 | random_gene = [] 26 | for element in gene[1:]: # The first element in the map is the function. We start from the second 27 | l = element[0] 28 | u = element[1] 29 | x = np.random.uniform(l, u) 30 | random_gene.append(x) 31 | enabled = False 32 | if np.random.uniform() < prob_enabled: 33 | enabled = True 34 | random_gene.append(enabled) 35 | chrom.append(tuple(random_gene)) 36 | return tuple(chrom) 37 | 38 | def gen_random_population(pop_size, SignalsMap, StopsMap, prob_enabled, json_file_path): 39 | Population = [] 40 | for i in range(pop_size): 41 | trader = {} 42 | trader['EntrySignals'] = gen_random_chrom(SignalsMap, prob_enabled) 43 | trader['EntrySignals'] = validate_chrom(trader['EntrySignals'], verbose=True) 44 | trader['ExitSignals'] = gen_random_chrom(SignalsMap, prob_enabled) 45 | trader['StopSignals'] = gen_random_chrom(StopsMap, 1.) 46 | Population.append(trader) 47 | D = {'population':Population} 48 | D['iteration'] = 0 49 | with open(json_file_path, 'w') as outfile: 50 | json.dump(D, outfile) 51 | return Population 52 | 53 | 54 | 55 | def evaluate_chrom(series, chrom, signal_map): 56 | conditions = [] 57 | for pos, gene in enumerate(chrom): 58 | f_args = [series] 59 | f_args.extend(gene) 60 | 61 | func = signal_map[pos][0] 62 | res = func(f_args) 63 | conditions.append(res) 64 | return conditions 65 | 66 | def validate_chrom(chrom, verbose=False): 67 | """ 68 | If none of the genes are enabled randomly enable one 69 | """ 70 | Enabled = [i[-1] for i in chrom] 71 | if np.sum(Enabled) == 0: # no genes are enabled 72 | new_chrom = list(chrom) 73 | pos = np.random.random_integers(0, len(chrom)-1) 74 | new_chrom[pos] = list(chrom[pos]) 75 | new_chrom[pos][-1] = True 76 | new_chrom[pos] = tuple(new_chrom[pos]) 77 | if verbose: 78 | print "Chromosome needed validation and was validated" 79 | return tuple(new_chrom) 80 | else: 81 | return chrom 82 | 83 | def simulate_strategy(data, trader, SignalsMap, start_period=1e3, verbose=False, direction=1): 84 | """ 85 | Simulates trading for a given trader 86 | 87 | trader: dict, dictionary with the three keys: EntrySignals, ExitSignals and StopSignals 88 | data: data frame with prices 89 | 90 | """ 91 | sl_pips = trader['StopSignals'][0][0] 92 | tp_pips = trader['StopSignals'][1][0] 93 | hold_time = trader['StopSignals'][2][0] 94 | simulation = sim.Simulation(data, sl_pips, tp_pips, hold_time) 95 | 96 | end_period = len(simulation.data) 97 | 98 | if verbose: 99 | ENABLED= [i[-1] for i in trader['EntrySignals']] 100 | print ENABLED 101 | indx = np.where(np.array(ENABLED) == True) 102 | s = np.array(SignalsMap)[indx] 103 | funcs = [i[0] for i in s] 104 | print funcs 105 | print np.array(trader['EntrySignals'])[indx] 106 | for t in np.arange(start_period, end_period): 107 | simulation.step = int(t) 108 | if verbose: 109 | if simulation.step % 100000 == 0: 110 | print simulation.step 111 | series = simulation.data.Close[:simulation.step].values 112 | if simulation.total_position == 0: 113 | entry_conditions = evaluate_chrom(series, trader['EntrySignals'], SignalsMap) 114 | entry = np.prod(entry_conditions, dtype=bool) 115 | if entry: 116 | if verbose: 117 | print "Open Position" 118 | simulation.open_position(direction) 119 | else: # Exit the position if there is an exit signal or you hit the stops 120 | exit_conditions = evaluate_chrom(series, trader['ExitSignals'], SignalsMap) 121 | exit_ = np.prod(exit_conditions, dtype=bool) 122 | if exit_: 123 | simulation.close_position(series[-1]) 124 | if verbose: 125 | print "Sell Signal" 126 | simulation.check_stops(verbose) # Check if the stops are hit 127 | 128 | return simulation 129 | 130 | 131 | def gene_info(gene, pos, SignalsMap): 132 | message = None 133 | if gene[-1]: 134 | if pos <=5: 135 | values = (gene[0] - gene[1], gene[0] + gene[1], -pos) 136 | message = "%s < (P[t-1] - P[t-2]) / P[t-2] < %s where t = %s" % values 137 | return message 138 | if pos == 6: 139 | values = (gene[0]-gene[1], gmap.pr_n_periods, gene[0] + gene[1]) 140 | message = " %s < Current price / (Max Price in last %s periods) < %s" % values 141 | if pos == 7: 142 | values = (gene[0]-gene[1], gmap.pr_n_periods, gene[0] + gene[1]) 143 | message = " %s < Current price / (Min Price in last %s periods) < %s" % values 144 | if pos == 8: 145 | values = (gene[0]-gene[1], gmap.vol_n_periods, gene[0] + gene[1]) 146 | message = " %s < Volatility / Average over last %s periods < %s" % values 147 | 148 | if pos == 9: 149 | values = (gene[0], gene[1]) 150 | message = " MA(%s) crosses from below MA(%s)" % values 151 | 152 | if pos == 10: 153 | values = (gene[0], gene[1]) 154 | message = " MA(%s) crosses from above MA(%s)" % values 155 | 156 | 157 | return message 158 | 159 | def display_chrom_info(chrom, SignalsMap): 160 | info = "" 161 | for pos, gene in enumerate(chrom): 162 | message = gene_info(gene, pos, SignalsMap) 163 | if message is not None: 164 | info += message + "\n" 165 | return info 166 | -------------------------------------------------------------------------------- /TradingSimulation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import datetime 4 | 5 | def load_data(path_to_data): 6 | """ Loads data in df with appropriate index """ 7 | data = pd.read_csv(path_to_data) 8 | #time_fmt = '%Y-%m-%d %H:%M:%S' # use this for the test file 9 | time_fmt = '%d.%m.%Y %H:%M:%S.%f' # Use this for the full file 10 | 11 | data['Time'] = data.Time.apply(lambda x: datetime.datetime.strptime(x, 12 | time_fmt)) 13 | return data 14 | 15 | 16 | def collapse_data(data, minutes): 17 | minutes = int(minutes) 18 | if minutes == 1: 19 | return data 20 | 21 | data['Minute_Interval'] = data.Minute.values / minutes 22 | # data['Weighted_Price'] = data.Close * data.Volume 23 | 24 | data1 = data[data.Volume > 0] # no trading at the end of Fridays. Drop these values 25 | group = data1[['Close', 'Low', 'High', 'Volume', 'Signed_Volume', 'Returns2']].groupby([data1['Year'], 26 | data1['Week'], data1['Day'], data1['Hour'], data1['Minute_Interval']]) 27 | 28 | df = group.agg({'Close':'last', 'Low':'last', 'High':'last', 'Volume':np.sum, 'Signed_Volume':np.sum, 29 | 'Returns2':np.sum}) 30 | return df 31 | 32 | 33 | class Simulation(object): 34 | def __init__(self, data, sl_pips, tp_pips, hold_time): 35 | self.data = data 36 | self.sl_pips = sl_pips # stop loss pips 37 | self.tp_pips = tp_pips # take profit pips 38 | self.hold_time = hold_time # time to hold position (num periods). Close after that 39 | self.step = 0 40 | self.pips = 0.01 41 | self.orders_record = [] # Record of opened positions 42 | self.positions_record = [] 43 | self.total_position = 0 44 | 45 | 46 | def open_position(self, direction): 47 | if self.total_position != 0: 48 | #print "Attempting to open a position when one already exists (no action)" 49 | return None 50 | 51 | self.direction = direction 52 | price = self.data['Close'].values[self.step] 53 | self.stop_loss = price - \ 54 | direction * self.sl_pips * self.pips 55 | 56 | self.take_profit = price + \ 57 | direction * self.tp_pips * self.pips 58 | 59 | self.close_time = self.step + self.hold_time 60 | 61 | self.total_position += direction 62 | self.orders_record.append([self.step, direction, price]) 63 | self.current_position = [self.step, direction, price] 64 | 65 | def close_position(self,price): 66 | if self.total_position == 0: 67 | return None 68 | self.total_position += - self.direction 69 | self.orders_record.append([self.step, -self.direction, price]) 70 | self.current_position.extend([self.step, price]) 71 | self.positions_record.append(self.current_position) 72 | assert self.total_position == 0 73 | 74 | def check_stops(self, verbose=False): 75 | if self.total_position == 0: 76 | return None 77 | min_price_stop = min(self.stop_loss, self.take_profit) 78 | max_price_stop = max(self.stop_loss, self.take_profit) 79 | if min_price_stop > self.data['Low'].values[self.step]: 80 | self.close_position(min_price_stop) 81 | if verbose: 82 | print "stop - min price" 83 | 84 | if max_price_stop < self.data['High'].values[self.step]: 85 | self.close_position(max_price_stop) 86 | if verbose: 87 | print "stop - max price" 88 | 89 | if self.step >= self.close_time: 90 | self.close_position(self.data['Close'].values[self.step]) 91 | if verbose: 92 | print "stop - time" 93 | def next_period(self): 94 | self.step += 1 95 | 96 | def calculate_profits(self): 97 | if len(self.orders_record)<2: 98 | return 0.1 99 | df = pd.DataFrame(self.orders_record) 100 | profit = np.sum(df[1] * df[2]) 101 | return -profit 102 | 103 | def calculate_profits2(self): 104 | df = pd.DataFrame(self.orders_record) 105 | df[3] = df[1] * df[2] 106 | df[4] = df.index/2 107 | profit = -df[3].groupby(df[4]).apply(sum) 108 | return profit 109 | --------------------------------------------------------------------------------