├── .gitignore └── src ├── config ├── config_20 ├── config_50 ├── config_100 ├── config_200 ├── config_20_dqn ├── agent_NEAT.py ├── agent_multi.py ├── visualize.py ├── agent_NEAT_p2p.py ├── genome_evaluator.py ├── population_syn.py ├── evolve.py └── q_agent_dcn_ema1020close.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | nbproject/project.properties 106 | nbproject/project.xml 107 | nbproject/private/private.xml 108 | -------------------------------------------------------------------------------- /src/config: -------------------------------------------------------------------------------- 1 | # neat-python configuration for the Forex-v0 environment on OpenAI Gym 2 | 3 | [NEAT] 4 | pop_size = 20 5 | # Note: the fitness threshold will never be reached because 6 | # we are controlling the termination ourselves based on simulation performance. 7 | fitness_criterion = max 8 | fitness_threshold = 10000.0 9 | reset_on_extinction = 0 10 | 11 | [AgentGenome] 12 | num_inputs = 38 13 | num_hidden = 0 14 | num_outputs = 3 15 | initial_connection = partial 0.5 16 | feed_forward = True 17 | compatibility_disjoint_coefficient = 1.0 18 | compatibility_weight_coefficient = 1.0 19 | conn_add_prob = 0.15 20 | conn_delete_prob = 0.1 21 | node_add_prob = 0.15 22 | node_delete_prob = 0.1 23 | activation_default = clamped 24 | activation_options = clamped 25 | activation_mutate_rate = 0.0 26 | aggregation_default = sum 27 | aggregation_options = sum 28 | aggregation_mutate_rate = 0.0 29 | bias_init_mean = 0.0 30 | bias_init_stdev = 1.0 31 | bias_replace_rate = 0.02 32 | bias_mutate_rate = 0.8 33 | bias_mutate_power = 0.4 34 | bias_max_value = 30.0 35 | bias_min_value = -30.0 36 | response_init_mean = 1.0 37 | response_init_stdev = 0.0 38 | response_replace_rate = 0.0 39 | response_mutate_rate = 0.1 40 | response_mutate_power = 0.01 41 | response_max_value = 30.0 42 | response_min_value = -30.0 43 | weight_max_value = 30 44 | weight_min_value = -30 45 | weight_init_mean = 0.0 46 | weight_init_stdev = 1.0 47 | weight_mutate_rate = 0.8 48 | weight_replace_rate = 0.02 49 | weight_mutate_power = 0.4 50 | enabled_default = True 51 | enabled_mutate_rate = 0.01 52 | 53 | [DefaultSpeciesSet] 54 | compatibility_threshold = 3.0 55 | 56 | [DefaultStagnation] 57 | species_fitness_func = mean 58 | max_stagnation = 15 59 | species_elitism = 4 60 | 61 | [DefaultReproduction] 62 | elitism = 2 63 | survival_threshold = 0.2 64 | -------------------------------------------------------------------------------- /src/config_20: -------------------------------------------------------------------------------- 1 | # neat-python configuration for the Forex-v0 environment on OpenAI Gym 2 | 3 | [NEAT] 4 | pop_size = 20 5 | # Note: the fitness threshold will never be reached because 6 | # we are controlling the termination ourselves based on simulation performance. 7 | fitness_criterion = max 8 | fitness_threshold = 10000.0 9 | reset_on_extinction = 0 10 | 11 | [AgentGenome] 12 | num_inputs = 38 13 | num_hidden = 0 14 | num_outputs = 6 15 | initial_connection = partial 0.5 16 | feed_forward = True 17 | compatibility_disjoint_coefficient = 1.0 18 | compatibility_weight_coefficient = 1.0 19 | conn_add_prob = 0.15 20 | conn_delete_prob = 0.1 21 | node_add_prob = 0.15 22 | node_delete_prob = 0.1 23 | activation_default = clamped 24 | activation_options = clamped 25 | activation_mutate_rate = 0.0 26 | aggregation_default = sum 27 | aggregation_options = sum 28 | aggregation_mutate_rate = 0.0 29 | bias_init_mean = 0.0 30 | bias_init_stdev = 1.0 31 | bias_replace_rate = 0.02 32 | bias_mutate_rate = 0.8 33 | bias_mutate_power = 0.4 34 | bias_max_value = 30.0 35 | bias_min_value = -30.0 36 | response_init_mean = 1.0 37 | response_init_stdev = 0.0 38 | response_replace_rate = 0.0 39 | response_mutate_rate = 0.1 40 | response_mutate_power = 0.01 41 | response_max_value = 30.0 42 | response_min_value = -30.0 43 | weight_max_value = 30 44 | weight_min_value = -30 45 | weight_init_mean = 0.0 46 | weight_init_stdev = 1.0 47 | weight_mutate_rate = 0.8 48 | weight_replace_rate = 0.02 49 | weight_mutate_power = 0.4 50 | enabled_default = True 51 | enabled_mutate_rate = 0.01 52 | 53 | [DefaultSpeciesSet] 54 | compatibility_threshold = 3.0 55 | 56 | [DefaultStagnation] 57 | species_fitness_func = mean 58 | max_stagnation = 14 59 | species_elitism = 4 60 | 61 | [DefaultReproduction] 62 | elitism = 2 63 | survival_threshold = 0.2 64 | -------------------------------------------------------------------------------- /src/config_50: -------------------------------------------------------------------------------- 1 | # neat-python configuration for the Forex-v0 environment on OpenAI Gym 2 | 3 | [NEAT] 4 | pop_size = 50 5 | # Note: the fitness threshold will never be reached because 6 | # we are controlling the termination ourselves based on simulation performance. 7 | fitness_criterion = max 8 | fitness_threshold = 10000.0 9 | reset_on_extinction = 0 10 | 11 | [AgentGenome] 12 | num_inputs = 38 13 | num_hidden = 0 14 | num_outputs = 3 15 | initial_connection = partial 0.5 16 | feed_forward = True 17 | compatibility_disjoint_coefficient = 1.0 18 | compatibility_weight_coefficient = 1.0 19 | conn_add_prob = 0.15 20 | conn_delete_prob = 0.1 21 | node_add_prob = 0.15 22 | node_delete_prob = 0.1 23 | activation_default = clamped 24 | activation_options = clamped 25 | activation_mutate_rate = 0.0 26 | aggregation_default = sum 27 | aggregation_options = sum 28 | aggregation_mutate_rate = 0.0 29 | bias_init_mean = 0.0 30 | bias_init_stdev = 1.0 31 | bias_replace_rate = 0.02 32 | bias_mutate_rate = 0.8 33 | bias_mutate_power = 0.2 34 | bias_max_value = 30.0 35 | bias_min_value = -30.0 36 | response_init_mean = 1.0 37 | response_init_stdev = 0.0 38 | response_replace_rate = 0.0 39 | response_mutate_rate = 0.1 40 | response_mutate_power = 0.008 41 | response_max_value = 30.0 42 | response_min_value = -30.0 43 | weight_max_value = 30 44 | weight_min_value = -30 45 | weight_init_mean = 0.0 46 | weight_init_stdev = 1.0 47 | weight_mutate_rate = 0.9 48 | weight_replace_rate = 0.02 49 | weight_mutate_power = 0.2 50 | enabled_default = True 51 | enabled_mutate_rate = 0.01 52 | 53 | [DefaultSpeciesSet] 54 | compatibility_threshold = 3.0 55 | 56 | [DefaultStagnation] 57 | species_fitness_func = mean 58 | max_stagnation = 12 59 | species_elitism = 6 60 | 61 | [DefaultReproduction] 62 | elitism = 2 63 | survival_threshold = 0.2 64 | -------------------------------------------------------------------------------- /src/config_100: -------------------------------------------------------------------------------- 1 | # neat-python configuration for the Forex-v0 environment on OpenAI Gym 2 | 3 | [NEAT] 4 | pop_size = 100 5 | # Note: the fitness threshold will never be reached because 6 | # we are controlling the termination ourselves based on simulation performance. 7 | fitness_criterion = max 8 | fitness_threshold = 10000.0 9 | reset_on_extinction = 0 10 | 11 | [AgentGenome] 12 | num_inputs = 38 13 | num_hidden = 0 14 | num_outputs = 3 15 | initial_connection = partial 0.5 16 | feed_forward = True 17 | compatibility_disjoint_coefficient = 1.0 18 | compatibility_weight_coefficient = 1.0 19 | conn_add_prob = 0.15 20 | conn_delete_prob = 0.1 21 | node_add_prob = 0.15 22 | node_delete_prob = 0.1 23 | activation_default = clamped 24 | activation_options = clamped 25 | activation_mutate_rate = 0.0 26 | aggregation_default = sum 27 | aggregation_options = sum 28 | aggregation_mutate_rate = 0.0 29 | bias_init_mean = 0.0 30 | bias_init_stdev = 1.0 31 | bias_replace_rate = 0.02 32 | bias_mutate_rate = 0.8 33 | bias_mutate_power = 0.08 34 | bias_max_value = 30.0 35 | bias_min_value = -30.0 36 | response_init_mean = 1.0 37 | response_init_stdev = 0.0 38 | response_replace_rate = 0.0 39 | response_mutate_rate = 0.1 40 | response_mutate_power = 0.005 41 | response_max_value = 30.0 42 | response_min_value = -30.0 43 | weight_max_value = 30 44 | weight_min_value = -30 45 | weight_init_mean = 0.0 46 | weight_init_stdev = 1.0 47 | weight_mutate_rate = 0.95 48 | weight_replace_rate = 0.02 49 | weight_mutate_power = 0.1 50 | enabled_default = True 51 | enabled_mutate_rate = 0.01 52 | 53 | [DefaultSpeciesSet] 54 | compatibility_threshold = 3.0 55 | 56 | [DefaultStagnation] 57 | species_fitness_func = mean 58 | max_stagnation = 10 59 | species_elitism = 8 60 | 61 | [DefaultReproduction] 62 | elitism = 3 63 | survival_threshold = 0.2 64 | -------------------------------------------------------------------------------- /src/config_200: -------------------------------------------------------------------------------- 1 | # neat-python configuration for the Forex-v0 environment on OpenAI Gym 2 | 3 | [NEAT] 4 | pop_size = 200 5 | # Note: the fitness threshold will never be reached because 6 | # we are controlling the termination ourselves based on simulation performance. 7 | fitness_criterion = max 8 | fitness_threshold = 10000.0 9 | reset_on_extinction = 0 10 | 11 | [AgentGenome] 12 | num_inputs = 38 13 | num_hidden = 0 14 | num_outputs = 3 15 | initial_connection = partial 0.5 16 | feed_forward = True 17 | compatibility_disjoint_coefficient = 1.0 18 | compatibility_weight_coefficient = 1.0 19 | conn_add_prob = 0.15 20 | conn_delete_prob = 0.1 21 | node_add_prob = 0.15 22 | node_delete_prob = 0.1 23 | activation_default = clamped 24 | activation_options = clamped 25 | activation_mutate_rate = 0.0 26 | aggregation_default = sum 27 | aggregation_options = sum 28 | aggregation_mutate_rate = 0.0 29 | bias_init_mean = 0.0 30 | bias_init_stdev = 1.0 31 | bias_replace_rate = 0.02 32 | bias_mutate_rate = 0.8 33 | bias_mutate_power = 0.04 34 | bias_max_value = 30.0 35 | bias_min_value = -30.0 36 | response_init_mean = 1.0 37 | response_init_stdev = 0.0 38 | response_replace_rate = 0.0 39 | response_mutate_rate = 0.1 40 | response_mutate_power = 0.001 41 | response_max_value = 30.0 42 | response_min_value = -30.0 43 | weight_max_value = 30 44 | weight_min_value = -30 45 | weight_init_mean = 0.0 46 | weight_init_stdev = 1.0 47 | weight_mutate_rate = 0.97 48 | weight_replace_rate = 0.02 49 | weight_mutate_power = 0.05 50 | enabled_default = True 51 | enabled_mutate_rate = 0.01 52 | 53 | [DefaultSpeciesSet] 54 | compatibility_threshold = 3.0 55 | 56 | [DefaultStagnation] 57 | species_fitness_func = mean 58 | max_stagnation = 10 59 | species_elitism = 8 60 | 61 | [DefaultReproduction] 62 | elitism = 3 63 | survival_threshold = 0.2 64 | -------------------------------------------------------------------------------- /src/config_20_dqn: -------------------------------------------------------------------------------- 1 | # neat-python configuration for the Forex-v0 environment on OpenAI Gym 2 | 3 | [NEAT] 4 | pop_size = 20 5 | # Note: the fitness threshold will never be reached because 6 | # we are controlling the termination ourselves based on simulation performance. 7 | fitness_criterion = max 8 | fitness_threshold = 10000.0 9 | reset_on_extinction = 0 10 | 11 | [AgentGenome] 12 | num_inputs = 38 13 | num_hidden = 0 14 | num_outputs = 6 15 | initial_connection = partial 0.5 16 | feed_forward = True 17 | compatibility_disjoint_coefficient = 1.0 18 | compatibility_weight_coefficient = 1.0 19 | conn_add_prob = 0 20 | conn_delete_prob = 0 21 | node_add_prob = 0.2 22 | node_delete_prob = 0.1 23 | activation_default = sigmoid 24 | activation_options = sigmoid,relu 25 | activation_mutate_rate = 0.15 26 | aggregation_default = sum 27 | aggregation_options = sum,product 28 | aggregation_mutate_rate = 0.15 29 | bias_init_mean = 0.0 30 | bias_init_stdev = 0.0 31 | bias_replace_rate = 0.0 32 | bias_mutate_rate = 0.0 33 | bias_mutate_power = 0.0 34 | bias_max_value = 1.0 35 | bias_min_value = 0.1 36 | response_init_mean = 0.5 37 | response_init_stdev = 0.5 38 | response_replace_rate = 0.0 39 | response_mutate_rate = 0.4 40 | response_mutate_power = 0.4 41 | response_max_value = 1.0 42 | response_min_value = 0.1 43 | weight_max_value = 1.0 44 | weight_min_value = 0.1 45 | weight_init_mean = 0.5 46 | weight_init_stdev = 0.5 47 | weight_mutate_rate = 0.8 48 | weight_replace_rate = 0.02 49 | weight_mutate_power = 0.5 50 | enabled_default = True 51 | enabled_mutate_rate = 0.01 52 | 53 | [DefaultSpeciesSet] 54 | compatibility_threshold = 3.0 55 | 56 | [DefaultStagnation] 57 | species_fitness_func = mean 58 | max_stagnation = 14 59 | species_elitism = 4 60 | 61 | [DefaultReproduction] 62 | elitism = 2 63 | survival_threshold = 0.2 64 | -------------------------------------------------------------------------------- /src/agent_NEAT.py: -------------------------------------------------------------------------------- 1 | # ANN evolved with NEAT as agent that uses the ForexEnv4 environment 2 | from __future__ import print_function 3 | from copy import deepcopy 4 | from gym.envs.registration import register 5 | #from population_syn import PopulationSyn # extended neat population for synchronizing with singularity p2p network 6 | from genome_evaluator import GenomeEvaluator 7 | import gym 8 | import sys 9 | import neat 10 | import os 11 | import pickle 12 | import random 13 | from neat.six_util import iteritems 14 | from neat.six_util import itervalues 15 | # Multi-core machine support 16 | NUM_CORES = 1 17 | # First argument is the training dataset 18 | ts_f = sys.argv[1] 19 | # Second is validation dataset 20 | vs_f = sys.argv[2] 21 | # Third argument is the url for syngularity sync 22 | #my_url = sys.argv[3] 23 | # fourth is the config filename 24 | my_config = sys.argv[3] 25 | # for cross-validation like training set 26 | index_t = 0 27 | 28 | # AgentGenome class 29 | class AgentGenome(neat.DefaultGenome): 30 | def __init__(self, key): 31 | super().__init__(key) 32 | self.discount = None 33 | 34 | def configure_new(self, config): 35 | super().configure_new(config) 36 | self.discount = 0.01 + 0.98 * random.random() 37 | 38 | def configure_crossover(self, genome1, genome2, config): 39 | super().configure_crossover(genome1, genome2, config) 40 | self.discount = random.choice((genome1.discount, genome2.discount)) 41 | 42 | def mutate(self, config): 43 | super().mutate(config) 44 | self.discount += random.gauss(0.0, 0.05) 45 | self.discount = max(0.01, min(0.99, self.discount)) 46 | 47 | def distance(self, other, config): 48 | dist = super().distance(other, config) 49 | disc_diff = abs(self.discount - other.discount) 50 | return dist + disc_diff 51 | 52 | def __str__(self): 53 | return "Reward discount: {0}\n{1}".format(self.discount, super().__str__()) 54 | 55 | def run(): 56 | # load the config file 57 | local_dir = os.path.dirname(__file__) 58 | config_path = os.path.join(local_dir, my_config) 59 | config = neat.Config(AgentGenome, neat.DefaultReproduction, 60 | neat.DefaultSpeciesSet, neat.DefaultStagnation, 61 | config_path) 62 | # uses the extended NEAT population PopulationSyn that synchronizes with singularity 63 | pop = neat.Population(config) 64 | # add reporters 65 | stats = neat.StatisticsReporter() 66 | pop.add_reporter(stats) 67 | pop.add_reporter(neat.StdOutReporter(True)) 68 | # save a checkpoint every 100 generations or 900 seconds. 69 | rep = neat.Checkpointer(100, 900) 70 | pop.add_reporter(rep) 71 | # class for evaluating the population 72 | ec = GenomeEvaluator(ts_f,vs_f) 73 | # initializes genomes fitness and gen_best just for the first time 74 | for g in itervalues(pop.population): 75 | g.fitness = -10000000.0 76 | ec.genomes_h.append(g) 77 | gen_best = g 78 | # initializations 79 | avg_score_v = -10000000.0 80 | avg_score_v_ant = avg_score_v 81 | avg_score = avg_score_v 82 | iteration_counter = 0 83 | best_fitness=-2000.0; 84 | pop_size=len(pop.population) 85 | # sets the nuber of continuous iterations 86 | num_iterations = round(200/len(pop.population))+1 87 | # repeat NEAT iterations until solved or keyboard interrupt 88 | while 1: 89 | try: 90 | # if it is not the first iteration calculate training and validation scores 91 | if iteration_counter >0: 92 | avg_score=ec.training_validation_score(gen_best, config) 93 | # if it is not the first iteration 94 | if iteration_counter >= 0: 95 | # synchronizes with singularity migrating maximum 3 specimens 96 | # pop.syn_singularity(4, my_url, stats,avg_score,rep.current_generation, config, ec.genomes_h) 97 | pop.species.speciate(config, pop.population, pop.generation) 98 | print("\nSpeciation after migration done") 99 | # perform pending evaluations on the singularity network, max 2 100 | #pop.evaluate_pending(2) 101 | #increment iteration counter 102 | iteration_counter = iteration_counter + 1 103 | # execute num_iterations consecutive iterations of the NEAT algorithm 104 | gen_best = pop.run(ec.evaluate_genomes, 2) 105 | # verify the training score is enough to stop the NEAT algorithm: TODO change to validation score when generalization is ok 106 | if avg_score < 2000000000: 107 | solved = False 108 | if solved: 109 | print("Solved.") 110 | # save the winners. 111 | for n, g in enumerate(best_genomes): 112 | name = 'winner-{0}'.format(n) 113 | with open(name + '.pickle', 'wb') as f: 114 | pickle.dump(g, f) 115 | break 116 | except KeyboardInterrupt: 117 | print("User break.") 118 | break 119 | env.close() 120 | 121 | if __name__ == '__main__': 122 | run() -------------------------------------------------------------------------------- /src/agent_multi.py: -------------------------------------------------------------------------------- 1 | # ANN evolved with NEAT as agent that uses the ForexEnv7 environment 2 | from __future__ import print_function 3 | from copy import deepcopy 4 | from gym.envs.registration import register 5 | #from population_syn import PopulationSyn # extended neat population for synchronizing with singularity p2p network 6 | from genome_evaluator import GenomeEvaluator 7 | import gym 8 | import sys 9 | import neat 10 | import os 11 | import pickle 12 | import random 13 | from neat.six_util import iteritems 14 | from neat.six_util import itervalues 15 | # Multi-core machine support 16 | NUM_CORES = 1 17 | # First argument is the training dataset 18 | ts_f = sys.argv[1] 19 | # Second is validation dataset 20 | vs_f = sys.argv[2] 21 | # Third argument is the url for syngularity sync 22 | #my_url = sys.argv[3] 23 | # fourth is the config filename 24 | my_config = sys.argv[3] 25 | # for cross-validation like training set 26 | index_t = 0 27 | 28 | # AgentGenome class 29 | class AgentGenome(neat.DefaultGenome): 30 | def __init__(self, key): 31 | super().__init__(key) 32 | self.discount = None 33 | 34 | def configure_new(self, config): 35 | super().configure_new(config) 36 | self.discount = 0.01 + 0.98 * random.random() 37 | 38 | def configure_crossover(self, genome1, genome2, config): 39 | super().configure_crossover(genome1, genome2, config) 40 | self.discount = random.choice((genome1.discount, genome2.discount)) 41 | 42 | def mutate(self, config): 43 | super().mutate(config) 44 | self.discount += random.gauss(0.0, 0.05) 45 | self.discount = max(0.01, min(0.99, self.discount)) 46 | 47 | def distance(self, other, config): 48 | dist = super().distance(other, config) 49 | disc_diff = abs(self.discount - other.discount) 50 | return dist + disc_diff 51 | 52 | def __str__(self): 53 | return "Reward discount: {0}\n{1}".format(self.discount, super().__str__()) 54 | 55 | def run(): 56 | # load the config file 57 | local_dir = os.path.dirname(__file__) 58 | config_path = os.path.join(local_dir, my_config) 59 | config = neat.Config(AgentGenome, neat.DefaultReproduction, 60 | neat.DefaultSpeciesSet, neat.DefaultStagnation, 61 | config_path) 62 | # uses the extended NEAT population PopulationSyn that synchronizes with singularity 63 | pop = neat.Population(config) 64 | # add reporters 65 | stats = neat.StatisticsReporter() 66 | pop.add_reporter(stats) 67 | pop.add_reporter(neat.StdOutReporter(True)) 68 | # save a checkpoint every 100 generations or 900 seconds. 69 | rep = neat.Checkpointer(100, 900) 70 | pop.add_reporter(rep) 71 | # class for evaluating the population 72 | ec = GenomeEvaluator(ts_f,vs_f) 73 | # initializes genomes fitness and gen_best just for the first time 74 | for g in itervalues(pop.population): 75 | g.fitness = -10000000.0 76 | ec.genomes_h.append(g) 77 | gen_best = g 78 | # initializations 79 | avg_score_v = -10000000.0 80 | avg_score_v_ant = avg_score_v 81 | avg_score = avg_score_v 82 | iteration_counter = 0 83 | best_fitness=-2000.0; 84 | pop_size=len(pop.population) 85 | # sets the nuber of continuous iterations 86 | num_iterations = round(200/len(pop.population))+1 87 | # repeat NEAT iterations until solved or keyboard interrupt 88 | while 1: 89 | try: 90 | # if it is not the first iteration calculate training and validation scores 91 | if iteration_counter >0: 92 | avg_score=ec.training_validation_score(gen_best, config) 93 | # if it is not the first iteration 94 | if iteration_counter >= 0: 95 | # synchronizes with singularity migrating maximum 3 specimens 96 | # pop.syn_singularity(4, my_url, stats,avg_score,rep.current_generation, config, ec.genomes_h) 97 | pop.species.speciate(config, pop.population, pop.generation) 98 | print("\nSpeciation after migration done") 99 | # perform pending evaluations on the singularity network, max 2 100 | #pop.evaluate_pending(2) 101 | #increment iteration counter 102 | iteration_counter = iteration_counter + 1 103 | # execute num_iterations consecutive iterations of the NEAT algorithm 104 | gen_best = pop.run(ec.evaluate_genomes, 2) 105 | # verify the training score is enough to stop the NEAT algorithm: TODO change to validation score when generalization is ok 106 | if avg_score < 2000000000: 107 | solved = False 108 | if solved: 109 | print("Solved.") 110 | # save the winners. 111 | for n, g in enumerate(best_genomes): 112 | name = 'winner-{0}'.format(n) 113 | with open(name + '.pickle', 'wb') as f: 114 | pickle.dump(g, f) 115 | break 116 | except KeyboardInterrupt: 117 | print("User break.") 118 | break 119 | env.close() 120 | 121 | if __name__ == '__main__': 122 | run() -------------------------------------------------------------------------------- /src/visualize.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import copy 4 | import warnings 5 | 6 | import graphviz 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | 11 | def plot_stats(statistics, ylog=False, view=False, filename='avg_fitness.svg'): 12 | """ Plots the population's average and best fitness. """ 13 | if plt is None: 14 | warnings.warn("This display is not available due to a missing optional dependency (matplotlib)") 15 | return 16 | 17 | generation = range(len(statistics.most_fit_genomes)) 18 | best_fitness = [c.fitness for c in statistics.most_fit_genomes] 19 | avg_fitness = np.array(statistics.get_fitness_mean()) 20 | stdev_fitness = np.array(statistics.get_fitness_stdev()) 21 | 22 | plt.plot(generation, avg_fitness, 'b-', label="average") 23 | #plt.plot(generation, avg_fitness - stdev_fitness, 'g-.', label="-1 sd") 24 | plt.plot(generation, avg_fitness + stdev_fitness, 'g-.', label="+1 sd") 25 | plt.plot(generation, best_fitness, 'r-', label="best") 26 | 27 | plt.title("Population's average and best fitness") 28 | plt.xlabel("Generations") 29 | plt.ylabel("Fitness") 30 | plt.grid() 31 | plt.legend(loc="best") 32 | if ylog: 33 | plt.gca().set_yscale('symlog') 34 | 35 | plt.savefig(filename) 36 | if view: 37 | plt.show() 38 | 39 | plt.close() 40 | 41 | 42 | def plot_species(statistics, view=False, filename='speciation.svg'): 43 | """ Visualizes speciation throughout evolution. """ 44 | if plt is None: 45 | warnings.warn("This display is not available due to a missing optional dependency (matplotlib)") 46 | return 47 | 48 | species_sizes = statistics.get_species_sizes() 49 | num_generations = len(species_sizes) 50 | curves = np.array(species_sizes).T 51 | 52 | fig, ax = plt.subplots() 53 | ax.stackplot(range(num_generations), *curves) 54 | 55 | plt.title("Speciation") 56 | plt.ylabel("Size per Species") 57 | plt.xlabel("Generations") 58 | 59 | plt.savefig(filename) 60 | 61 | if view: 62 | plt.show() 63 | 64 | plt.close() 65 | 66 | 67 | def draw_net(config, genome, view=False, filename=None, node_names=None, show_disabled=True, prune_unused=False, 68 | node_colors=None, fmt='svg'): 69 | """ Receives a genome and draws a neural network with arbitrary topology. """ 70 | # Attributes for network nodes. 71 | if graphviz is None: 72 | warnings.warn("This display is not available due to a missing optional dependency (graphviz)") 73 | return 74 | 75 | if node_names is None: 76 | node_names = {} 77 | 78 | assert type(node_names) is dict 79 | 80 | if node_colors is None: 81 | node_colors = {} 82 | 83 | assert type(node_colors) is dict 84 | 85 | node_attrs = { 86 | 'shape': 'circle', 87 | 'fontsize': '9', 88 | 'height': '0.2', 89 | 'width': '0.2'} 90 | 91 | dot = graphviz.Digraph(format=fmt, node_attr=node_attrs) 92 | 93 | inputs = set() 94 | for k in config.genome_config.input_keys: 95 | inputs.add(k) 96 | name = node_names.get(k, str(k)) 97 | input_attrs = {'style': 'filled', 98 | 'shape': 'box'} 99 | input_attrs['fillcolor'] = node_colors.get(k, 'lightgray') 100 | dot.node(name, _attributes=input_attrs) 101 | 102 | outputs = set() 103 | for k in config.genome_config.output_keys: 104 | outputs.add(k) 105 | name = node_names.get(k, str(k)) 106 | node_attrs = {'style': 'filled'} 107 | node_attrs['fillcolor'] = node_colors.get(k, 'lightblue') 108 | 109 | dot.node(name, _attributes=node_attrs) 110 | 111 | if prune_unused: 112 | connections = set() 113 | for cg in genome.connections.values(): 114 | if cg.enabled or show_disabled: 115 | connections.add(cg.key) 116 | 117 | used_nodes = copy.copy(outputs) 118 | pending = copy.copy(outputs) 119 | while pending: 120 | #print(pending, used_nodes) 121 | new_pending = set() 122 | for a, b in connections: 123 | if b in pending and a not in used_nodes: 124 | new_pending.add(a) 125 | used_nodes.add(a) 126 | pending = new_pending 127 | else: 128 | used_nodes = set(genome.nodes.keys()) 129 | 130 | for n in used_nodes: 131 | if n in inputs or n in outputs: 132 | continue 133 | 134 | attrs = {'style': 'filled'} 135 | attrs['fillcolor'] = node_colors.get(n, 'white') 136 | dot.node(str(n), _attributes=attrs) 137 | 138 | for cg in genome.connections.values(): 139 | if cg.enabled or show_disabled: 140 | #if cg.input not in used_nodes or cg.output not in used_nodes: 141 | # continue 142 | input, output = cg.key 143 | a = node_names.get(input, str(input)) 144 | b = node_names.get(output, str(output)) 145 | style = 'solid' if cg.enabled else 'dotted' 146 | color = 'green' if cg.weight > 0 else 'red' 147 | width = str(0.1 + abs(cg.weight / 5.0)) 148 | dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width}) 149 | 150 | dot.render(filename, view=view) 151 | 152 | return dot -------------------------------------------------------------------------------- /src/agent_NEAT_p2p.py: -------------------------------------------------------------------------------- 1 | # ANN evolved with NEAT as agent that uses the ForexEnv environment 2 | from __future__ import print_function 3 | from copy import deepcopy 4 | from gym.envs.registration import register 5 | #******************************************************************* 6 | # extended neat population for synchronizing with singularity p2p network 7 | from population_syn import PopulationSyn 8 | # ****************************************************************** 9 | from genome_evaluator import GenomeEvaluator 10 | import gym 11 | import sys 12 | import neat 13 | import os 14 | import pickle 15 | import random 16 | from neat.six_util import iteritems 17 | from neat.six_util import itervalues 18 | # Multi-core machine support 19 | NUM_CORES = 1 20 | # First argument is the training dataset 21 | ts_f = sys.argv[1] 22 | # Second is validation dataset 23 | vs_f = sys.argv[2] 24 | 25 | # Thirdis the config filename 26 | my_config = sys.argv[3] 27 | # fourth argument is the url for syngularity sync 28 | my_url = sys.argv[4] 29 | # for cross-validation like training set 30 | index_t = 0 31 | 32 | # AgentGenome class 33 | class AgentGenome(neat.DefaultGenome): 34 | def __init__(self, key): 35 | super().__init__(key) 36 | self.discount = None 37 | 38 | def configure_new(self, config): 39 | super().configure_new(config) 40 | self.discount = 0.01 + 0.98 * random.random() 41 | 42 | def configure_crossover(self, genome1, genome2, config): 43 | super().configure_crossover(genome1, genome2, config) 44 | self.discount = random.choice((genome1.discount, genome2.discount)) 45 | 46 | def mutate(self, config): 47 | super().mutate(config) 48 | self.discount += random.gauss(0.0, 0.05) 49 | self.discount = max(0.01, min(0.99, self.discount)) 50 | 51 | def distance(self, other, config): 52 | dist = super().distance(other, config) 53 | disc_diff = abs(self.discount - other.discount) 54 | return dist + disc_diff 55 | 56 | def __str__(self): 57 | return "Reward discount: {0}\n{1}".format(self.discount, super().__str__()) 58 | 59 | def run(): 60 | # load the config file 61 | local_dir = os.path.dirname(__file__) 62 | config_path = os.path.join(local_dir, my_config) 63 | config = neat.Config(AgentGenome, neat.DefaultReproduction, 64 | neat.DefaultSpeciesSet, neat.DefaultStagnation, 65 | config_path) 66 | 67 | #************************************************************************* 68 | # uses the extended NEAT population PopulationSyn that synchronizes with singularity 69 | # pop = neat.Population(config) 70 | pop = PopulationSyn(config) 71 | #************************************************************************** 72 | # add reporters 73 | stats = neat.StatisticsReporter() 74 | pop.add_reporter(stats) 75 | pop.add_reporter(neat.StdOutReporter(True)) 76 | # save a checkpoint every 100 generations or 900 seconds. 77 | rep = neat.Checkpointer(100, 900) 78 | pop.add_reporter(rep) 79 | # class for evaluating the population 80 | ec = GenomeEvaluator(ts_f,vs_f) 81 | # initializes genomes fitness and gen_best just for the first time 82 | for g in itervalues(pop.population): 83 | g.fitness = -10000000.0 84 | ec.genomes_h.append(g) 85 | gen_best = g 86 | # initializations 87 | avg_score_v = -10000000.0 88 | avg_score_v_ant = avg_score_v 89 | avg_score = avg_score_v 90 | iteration_counter = 0 91 | best_fitness=-2000.0; 92 | pop_size=len(pop.population) 93 | # sets the nuber of continuous iterations 94 | num_iterations = round(200/len(pop.population))+1 95 | # repeat NEAT iterations until solved or keyboard interrupt 96 | while 1: 97 | try: 98 | # if it is not the first iteration calculate training and validation scores 99 | if iteration_counter >0: 100 | avg_score=ec.training_validation_score(gen_best, config) 101 | # if it is not the first iteration 102 | if iteration_counter >= 0: 103 | #************************************************************** 104 | # synchronizes with singularity migrating maximum 3 specimens 105 | pop.syn_singularity(4, my_url, stats,avg_score,rep.current_generation, config, ec.genomes_h) 106 | #************************************************************** 107 | pop.species.speciate(config, pop.population, pop.generation) 108 | print("\nSpeciation after migration done") 109 | # perform pending evaluations on the singularity network, max 2 110 | #pop.evaluate_pending(2) 111 | #increment iteration counter 112 | iteration_counter = iteration_counter + 1 113 | # execute num_iterations consecutive iterations of the NEAT algorithm 114 | gen_best = pop.run(ec.evaluate_genomes, 2) 115 | # verify the training score is enough to stop the NEAT algorithm: TODO change to validation score when generalization is ok 116 | if avg_score < 2000000000: 117 | solved = False 118 | if solved: 119 | print("Solved.") 120 | # save the winners. 121 | for n, g in enumerate(best_genomes): 122 | name = 'winner-{0}'.format(n) 123 | with open(name + '.pickle', 'wb') as f: 124 | pickle.dump(g, f) 125 | break 126 | except KeyboardInterrupt: 127 | print("User break.") 128 | break 129 | env.close() 130 | 131 | if __name__ == '__main__': 132 | run() -------------------------------------------------------------------------------- /src/genome_evaluator.py: -------------------------------------------------------------------------------- 1 | # library for ann genome evaluation 2 | from __future__ import print_function 3 | from copy import deepcopy 4 | import gym 5 | import gym.wrappers 6 | import gym_forex 7 | import json 8 | #import matplotlib.pyplot as plt 9 | import multiprocessing 10 | import neat 11 | from neat.six_util import iteritems 12 | from neat.six_util import itervalues 13 | import numpy as np 14 | import os 15 | import pickle 16 | import random 17 | import sys 18 | import time 19 | #import visualize 20 | from gym.envs.registration import register 21 | #from population_syn import PopulationSyn # extended neat population for synchronizing witn singularity p2p network 22 | # Multi-core machine support 23 | NUM_CORES = 1 24 | 25 | # class for evaluating the genomes 26 | class GenomeEvaluator(object): 27 | genomes_h=[] 28 | def __init__(self, ts_f, vs_f): 29 | self.pool = None if NUM_CORES < 2 else multiprocessing.Pool(NUM_CORES) 30 | self.test_episodes = [] 31 | self.generation = 0 32 | self.min_reward = -15 33 | self.max_reward = 15 34 | self.episode_score = [] 35 | self.episode_length = [] 36 | # register the gym-forex openai gym environment 37 | register( 38 | id='ForexTrainingSet-v1', 39 | entry_point='gym_forex.envs:ForexEnv4', 40 | kwargs={'dataset': ts_f, 'volume':0.2, 'sl':500, 'tp':500,'obsticks':2, 'capital':10000, 'leverage':100} 41 | ) 42 | register( 43 | id='ForexValidationSet-v1', 44 | entry_point='gym_forex.envs:ForexEnv4', 45 | kwargs={'dataset': vs_f,'volume':0.2, 'sl':500, 'tp':500,'obsticks':2, 'capital':10000, 'leverage':100} 46 | ) 47 | # make openai gym environments 48 | self.env_t = gym.make('ForexTrainingSet-v1') 49 | self.env_v = gym.make('ForexValidationSet-v1') 50 | # Shows the action and observation space from the forex_env, its observation space is 51 | # bidimentional, so it has to be converted to an array with nn_format() for direct ANN feed. (Not if evaluating with external DQN) 52 | print("action space: {0!r}".format(self.env_t.action_space)) 53 | print("observation space: {0!r}".format(self.env_t.observation_space)) 54 | #self.env_t = gym.wrappers.Monitor(env_t, 'results', force=True) 55 | 56 | # converts a bidimentional matrix to an one-dimention array 57 | def nn_format(self, obs): 58 | output = [] 59 | for arr in obs: 60 | for val in arr: 61 | output.append(val) 62 | return output 63 | 64 | # simulates a genom in all the training dataset (all the training subsets) 65 | def simulate(self, nets): 66 | # convert nets to D 67 | scores = [] 68 | sub_scores=[] 69 | self.test_episodes = [] 70 | # Evalua cada net en todos los env_t excepto el env actual 71 | for genome, net in nets: 72 | sub_scores=[] 73 | observation = self.env_t.reset() 74 | score=0.0 75 | #if i==index_t: 76 | while 1: 77 | output = net.activate(self.nn_format(observation)) 78 | action = np.argmax(output)# buy, sell or nop 79 | observation, reward, done, info = self.env_t.step(action) 80 | score += reward 81 | #env_t.render() 82 | if done: 83 | break 84 | sub_scores.append(score) 85 | # calculate fitness per genome 86 | scores.append(sum(sub_scores) / len(sub_scores)) 87 | print("Score range [{:.3f}, {:.3f}]".format(min(scores), max(scores))) 88 | return scores 89 | 90 | def evaluate_genomes(self, genomes, config): 91 | self.generation += 1 92 | t0 = time.time() 93 | nets = [] 94 | for gid, g in genomes: 95 | nets.append((g, neat.nn.FeedForwardNetwork.create(g, config))) 96 | t0 = time.time() 97 | scores = self.simulate(nets) 98 | t0 = time.time() 99 | print("Evaluating {0} test episodes".format(len(self.test_episodes))) 100 | i = 0 101 | self.genomes_h=[] 102 | for genome, net in nets: 103 | genome.fitness = scores[i] 104 | self.genomes_h.append(genome) 105 | i = i + 1 106 | 107 | def training_validation_score(self,gen_best,config): 108 | # calculate training and validation fitness 109 | best_scores = [] 110 | observation = self.env_t.reset() 111 | score = 0.0 112 | step = 0 113 | gen_best_nn = neat.nn.FeedForwardNetwork.create(gen_best, config) 114 | # calculate the training set score 115 | while 1: 116 | step += 1 117 | output = gen_best_nn.activate(self.nn_format(observation)) 118 | 119 | action = np.argmax(output)# buy,sell or 120 | observation, reward, done, info = self.env_t.step(action) 121 | score += reward 122 | self.env_t.render() 123 | if done: 124 | break 125 | self.episode_score.append(score) 126 | self.episode_length.append(step) 127 | best_scores.append(score) 128 | avg_score = sum(best_scores) / len(best_scores) 129 | print("Training Set Score=", score, " avg_score=", avg_score, " num_closes= ", info["num_closes"], 130 | " balance=", info["balance"]) 131 | # calculate the validation set score 132 | best_scores = [] 133 | observation = self.env_v.reset() 134 | score = 0.0 135 | step = 0 136 | gen_best_nn = neat.nn.FeedForwardNetwork.create(gen_best, config) 137 | while 1: 138 | step += 1 139 | output = gen_best_nn.activate(self.nn_format(observation)) 140 | action = np.argmax(output)# buy,sell or 141 | observation, reward, done, info = self.env_v.step(action) 142 | score += reward 143 | #env_v.render() 144 | if done: 145 | break 146 | best_scores.append(score) 147 | avg_score_v = sum(best_scores) / len(best_scores) 148 | print("Validation Set Score = ", avg_score_v) 149 | print("*********************************************************") 150 | return avg_score -------------------------------------------------------------------------------- /src/population_syn.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from neat import Population 3 | from copy import deepcopy 4 | import pickle 5 | from neat.six_util import iteritems 6 | from neat.six_util import itervalues 7 | # PopulationSyn class for synchronizing optimization states with the singularity p2p optimzation network 8 | 9 | # PopulationSyn extends Population 10 | class PopulationSyn(Population): 11 | #def getBestGenomes(genomes_h, number) 12 | 13 | # replaces in remote the genomes that has less_fit_key 14 | def replaceGenomes(self, genomes, less_fit_key, remote): 15 | genomes_h = [] 16 | for g in genomes: 17 | if g.key == less_fit_key: 18 | #remote.key = less_fit_key 19 | g = remote 20 | genomes_h.append(g) 21 | return genomes_h 22 | 23 | # calculateFitness(best_genomes) 24 | def calculateFitness(self, best_genomes): 25 | countr=0 26 | accum=0 27 | best=None 28 | max_fitness=-1000 29 | #search for max fitness 30 | for n, g in enumerate(best_genomes): 31 | accum=accum+g.fitness 32 | countr = countr + 1 33 | #print(' fit',n,'=',g.fitness) 34 | if (g.fitness > max_fitness): 35 | max_fitness = g.fitness 36 | best = g 37 | if countr > 0: 38 | best_fitness = ((len(best_genomes)-1)*g.fitness+(accum/countr))/len(best_genomes) 39 | else: 40 | best_fitness = -100000 41 | return best_fitness 42 | 43 | # searchLessFit() 44 | def searchLessFit(self, genomes_h): 45 | less_fit = None 46 | min_fitness = 10000 47 | #print('\ngenomes_h = ',genomes_h) 48 | for g in genomes_h: 49 | #print('\ng = ',g) 50 | #print('\ng[1].key = ',g[1].key) 51 | #print('\ng[1].fitness=',g[1].fitness) 52 | if g.fitness < min_fitness: 53 | min_fitness = g.fitness 54 | less_fit = g 55 | return less_fit 56 | 57 | # synSingularity method for synchronizing NEAT optimization states with singularity 58 | # args: num_replacements = number of specimens to be migrated to/from singularity 59 | # my_url = url of the singularity API 60 | # stats = neat.StatisticsReporter 61 | # returns: best_genoms selected between the remote and local 62 | def syn_singularity(self, num_replacements, my_url, stats, avg_score, current_generation, config, genomes_h): 63 | # downloads process from singualrity to find last optimum 64 | print('num_rep=', num_replacements,'my_url=', my_url,'stats=', stats, 65 | 'avg_score=', avg_score, 'current_generation=', current_generation) 66 | res = requests.get(my_url + "/processes/1?username=harveybc&pass_hash=$2a$04$ntNHmofQoMoajG89mTEM2uSR66jKXBgRQJnCgqfNN38aq9UkN4Y6q&process_hash=ph") 67 | cont = res.json() 68 | last_optimum_id = cont['result'][0]['last_optimum_id'] 69 | # calcualte local_perf as the weitgthed average of the best performers 70 | best_genomes = stats.best_unique_genomes(num_replacements) 71 | local_perf = self.calculateFitness(best_genomes) 72 | # remote performance from results of request 73 | remote_perf = cont['result'][0]['current_block_performance'] 74 | # print results of request 75 | print('\nremote_performance =', cont['result'][0]['current_block_performance'], '\nlocal_performance =', local_perf, '\nlast_optimum_id =', cont['result'][0]['last_optimum_id']) 76 | # if remote_performance is not equal to local_performance, download remote_reps 77 | parameter_downloaded = 0 78 | if (local_perf != remote_perf): 79 | # hace request GetParameter(id) 80 | res_p = requests.get(my_url + "/parameters/" + str(last_optimum_id) + "?username=harveybc&pass_hash=$2a$04$ntNHmofQoMoajG89mTEM2uSR66jKXBgRQJnCgqfNN38aq9UkN4Y6q&process_hash=ph") 81 | cont_param = res_p.json() 82 | print('\ncont_param =', cont_param) 83 | # descarga el checkpoint del link de la respuesta si cont.parameter_link 84 | if cont_param['result'][0]['parameter_link'] is not None: 85 | genom_data = requests.get(cont_param['result'][0]['parameter_link']).content 86 | with open('remote_reps', 'wb') as handler: 87 | handler.write(genom_data) 88 | handler.close() 89 | # carga genom descargado en nueva poblacion pop2 90 | with open('remote_reps', 'rb') as f: 91 | remote_reps = pickle.load(f) 92 | print('\nPARAMETERS DOWNLOADED: remote_reps=', remote_reps) 93 | parameter_downloaded = 1 94 | 95 | # if local_perf < remote_perf 96 | if (local_perf < remote_perf) and (parameter_downloaded): 97 | # for each remote_reps as remote 98 | print('\nremote_fitness = ', remote_perf, 'local_fitness = ', local_perf) 99 | #print('\ngenomes_h = ',genomes_h) 100 | genomes = genomes_h 101 | for remote in remote_reps: 102 | # search the less_fit in pop 103 | less_fit = self.searchLessFit(genomes) 104 | # replaces less_fit with remote 105 | #print("less_fit = ", less_fit) 106 | #less_fit_key = less_fit.key 107 | less_fit_key = remote.key 108 | print('\nREPLACED = ', less_fit_key, 'fitness=', less_fit.fitness, 109 | 'new_fitness', remote.fitness) 110 | #replaces lessfit in population by remote with the same key as less fit 111 | remote.key=less_fit_key 112 | self.population[less_fit_key] = remote 113 | genomes=self.replaceGenomes(genomes, less_fit_key, remote) 114 | 115 | # if local_perf > remote_perf 116 | if (local_perf >remote_perf): 117 | # upload best_genomes 118 | print('***********************************************************') 119 | print("\nNEW OPTIMUM") 120 | for g in best_genomes: 121 | print("\nbest_genomes[i] = ",g.key," fitness = ",g.fitness) 122 | filename = '{0}{1}'.format("reps-", current_generation) 123 | with open(filename, 'wb') as f: 124 | pickle.dump(best_genomes, f) 125 | # Hace request de CreateParam a syn 126 | form_data = {"process_hash": "ph", "app_hash": "ah", 127 | "parameter_link": my_url + "/genoms/" + filename, 128 | "parameter_text": 0, "parameter_blob": "", "validation_hash": "", 129 | "hash": "h", "performance": local_perf, "redir": "1", "username": "harveybc", 130 | "pass_hash": "$2a$04$ntNHmofQoMoajG89mTEM2uSR66jKXBgRQJnCgqfNN38aq9UkN4Y6q"} 131 | res = requests.post( 132 | my_url + "/parameters?username=harveybc&pass_hash=$2a$04$ntNHmofQoMoajG89mTEM2uSR66jKXBgRQJnCgqfNN38aq9UkN4Y6q&process_hash=ph", 133 | data=form_data) 134 | res_json = res.json() 135 | return 0 136 | 137 | def evaluate_pending(self,max_pending): 138 | # TODO: 139 | # VERIFY IF THERE ARE PENDING EVALUATIONS IN SINGULARITY 140 | # EVALUATE NUM_EVALUATIONS PENDING EVALUATIONS 141 | return 0 142 | -------------------------------------------------------------------------------- /src/evolve.py: -------------------------------------------------------------------------------- 1 | # Evolve a control/reward estimation network for the OpenAI Gym 2 | # LunarLander-v2 environment (https://gym.openai.com/envs/LunarLander-v2). 3 | # Sample run here: https://gym.openai.com/evaluations/eval_FbKq5MxAS9GlvB7W6ioJkg 4 | 5 | from __future__ import print_function 6 | 7 | import gym 8 | import gym.wrappers 9 | 10 | import matplotlib.pyplot as plt 11 | 12 | import multiprocessing 13 | import neat 14 | import numpy as np 15 | import os 16 | import pickle 17 | import random 18 | import time 19 | 20 | import visualize 21 | 22 | NUM_CORES = 8 23 | 24 | env = gym.make('LunarLander-v2') 25 | 26 | print("action space: {0!r}".format(env.action_space)) 27 | print("observation space: {0!r}".format(env.observation_space)) 28 | 29 | env = gym.wrappers.Monitor(env, 'results', force=True) 30 | 31 | 32 | class LanderGenome(neat.DefaultGenome): 33 | def __init__(self, key): 34 | super().__init__(key) 35 | self.discount = None 36 | 37 | def configure_new(self, config): 38 | super().configure_new(config) 39 | self.discount = 0.01 + 0.98 * random.random() 40 | 41 | def configure_crossover(self, genome1, genome2, config): 42 | super().configure_crossover(genome1, genome2, config) 43 | self.discount = random.choice((genome1.discount, genome2.discount)) 44 | 45 | def mutate(self, config): 46 | super().mutate(config) 47 | self.discount += random.gauss(0.0, 0.05) 48 | self.discount = max(0.01, min(0.99, self.discount)) 49 | 50 | def distance(self, other, config): 51 | dist = super().distance(other, config) 52 | disc_diff = abs(self.discount - other.discount) 53 | return dist + disc_diff 54 | 55 | def __str__(self): 56 | return "Reward discount: {0}\n{1}".format(self.discount, 57 | super().__str__()) 58 | 59 | # convert an ann to a dcn model 60 | def ann2dcn(self, nets_ann, num_vectors, vector_size): 61 | # esta funci?n debe retornar un arreglo de modelos 62 | # Deep Conv Neural Net for Deep-Q learning Model 63 | models = [] 64 | # for each net generate a dnn model 65 | for net in nets_ann: 66 | # creates a new model 67 | model = Sequential() 68 | # node counter 69 | c_node = 0 70 | # initialize values from input node 71 | node, act_func, agg_func, bias, response, links = net.node_evals[net.input_nodes[0]] 72 | # repeat until next_node != output (add layers) 73 | while true: 74 | # add the layer depending on the conection to the next node: 75 | # act_funct of the next node = core layer 76 | # bias of the next neuron (0.1,1) = kernel size 77 | kernel_size = round(bias * max_kernel_size) 78 | # response of the next node(0.1,1)= pool_size, stride 79 | pool_size = round(response * max_pool_size) 80 | # link.w to the next node(0,1) = number of filters 81 | filters = links[0].w 82 | ########################################################## 83 | # Encoding: 84 | # agg_funct = min -> adds dropout, else adds conv1D 85 | # agg_funct = sum -> add pooling layer 86 | # agg_funct = product -> does nothing(add it as option in NEAT config) 87 | # act:funct = relu -> adds relu layer 88 | # act_funct = sigmoid -> adds hard-sigmoid layer 89 | ########################################################## 90 | # si agg_funct = min: adiciona capa dropout # TODO: add min aggregation_options in config_20_dqn 91 | if agg_funct == 'min': 92 | # if its the first node: 93 | if c_node==0: 94 | model.add(Dropout(0.1, input_shape=(num_vectors, vector_size))) 95 | else: 96 | model.add(Dropout(0.1)) 97 | # sino es dropout adiciona una capa Conv1D (note in config that sum and product are set so a conv1d layer has double the chances than dropout (min agg_funct)) 98 | else: 99 | if c_node==0: 100 | model.add(Conv1D(filters, kernel_size, input_shape=(num_vectors, vector_size))) 101 | else: 102 | model.add(Conv1D(filters, kernel_size)) 103 | # act_funct = sigmoid: relu 104 | if act_funct == 'relu': 105 | model.add(Activation('relu')) 106 | # act_funct = tanh: hard_sigmoid 107 | if act_funct == 'sigmoid': 108 | model.add(Activation('hard_sigmoid')) 109 | # agg_funct = sum:pooling, product:no-pooling 110 | if agg_funct == 'sum': 111 | model.add(MaxPooling1D(pool_size=pool_size, strides=pool_size)) 112 | # TODO: DROPOUT SOLO SE DEBE USAR EN TRAINING, NO EN EVAL 113 | # stop condition for while: until next node = output 114 | if links[0].i==net.output_nodes[0]: 115 | break 116 | # read values from next node 117 | node, act_func, agg_func, bias, response, links = net.node_evals[links[0].i] 118 | # increment node counter 119 | c_node += 1 120 | # adds a dense layer with the parameters of the output node with response attribute 121 | model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors 122 | model.add(Dense(response*max_dense)) # valor ?ptimo:64 @400k 123 | model.add(Activation('relu')) 124 | # TODO: Probar con Hard sigmoid(pq controls requieren -1,1) y relu(0,1)-> mod. controls para prueba 125 | model.add(Dense(self.action_size)) 126 | model.add(Activation('hard_sigmoid')) 127 | # multi-GPU support 128 | #model = to_multi_gpu(model) 129 | # use SGD optimizer 130 | opt = SGD(lr=self.learning_rate) 131 | model.compile(loss="mean_squared_error", optimizer=opt, 132 | metrics=["accuracy"]) 133 | # append model to models 134 | models.append(model) 135 | return models 136 | 137 | 138 | def compute_fitness(genome, net, episodes, min_reward, max_reward): 139 | m = int(round(np.log(0.01) / np.log(genome.discount))) 140 | discount_function = [genome.discount ** (m - i) for i in range(m + 1)] 141 | 142 | reward_error = [] 143 | for score, data in episodes: 144 | # Compute normalized discounted reward. 145 | dr = np.convolve(data[:,-1], discount_function)[m:] 146 | dr = 2 * (dr - min_reward) / (max_reward - min_reward) - 1.0 147 | dr = np.clip(dr, -1.0, 1.0) 148 | 149 | for row, dr in zip(data, dr): 150 | observation = row[:8] 151 | action = int(row[8]) 152 | output = net.activate(observation) 153 | reward_error.append(float((output[action] - dr) ** 2)) 154 | 155 | return reward_error 156 | 157 | 158 | class PooledErrorCompute(object): 159 | def __init__(self): 160 | self.pool = None if NUM_CORES < 2 else multiprocessing.Pool(NUM_CORES) 161 | self.test_episodes = [] 162 | self.generation = 0 163 | 164 | self.min_reward = -200 165 | self.max_reward = 200 166 | 167 | self.episode_score = [] 168 | self.episode_length = [] 169 | 170 | def simulate(self, nets): 171 | scores = [] 172 | for genome, net in nets: 173 | observation = env.reset() 174 | step = 0 175 | data = [] 176 | while 1: 177 | step += 1 178 | if step < 200 and random.random() < 0.2: 179 | action = env.action_space.sample() 180 | else: 181 | output = net.activate(observation) 182 | action = np.argmax(output) 183 | 184 | observation, reward, done, info = env.step(action) 185 | data.append(np.hstack((observation, action, reward))) 186 | 187 | if done: 188 | break 189 | 190 | data = np.array(data) 191 | score = np.sum(data[:,-1]) 192 | self.episode_score.append(score) 193 | scores.append(score) 194 | self.episode_length.append(step) 195 | 196 | self.test_episodes.append((score, data)) 197 | 198 | print("Score range [{:.3f}, {:.3f}]".format(min(scores), max(scores))) 199 | 200 | def evaluate_genomes(self, genomes, config): 201 | self.generation += 1 202 | 203 | t0 = time.time() 204 | nets = [] 205 | for gid, g in genomes: 206 | nets.append((g, neat.nn.FeedForwardNetwork.create(g, config))) 207 | 208 | print("network creation time {0}".format(time.time() - t0)) 209 | t0 = time.time() 210 | 211 | # Periodically generate a new set of episodes for comparison. 212 | if 1 == self.generation % 10: 213 | self.test_episodes = self.test_episodes[-300:] 214 | self.simulate(nets) 215 | print("simulation run time {0}".format(time.time() - t0)) 216 | t0 = time.time() 217 | 218 | # Assign a composite fitness to each genome; genomes can make progress either 219 | # by improving their total reward or by making more accurate reward estimates. 220 | print("Evaluating {0} test episodes".format(len(self.test_episodes))) 221 | if self.pool is None: 222 | for genome, net in nets: 223 | reward_error = compute_fitness(genome, net, self.test_episodes, self.min_reward, self.max_reward) 224 | genome.fitness = -np.sum(reward_error) / len(self.test_episodes) 225 | else: 226 | jobs = [] 227 | for genome, net in nets: 228 | jobs.append(self.pool.apply_async(compute_fitness, 229 | (genome, net, self.test_episodes, self.min_reward, self.max_reward))) 230 | 231 | for job, (genome_id, genome) in zip(jobs, genomes): 232 | reward_error = job.get(timeout=None) 233 | genome.fitness = -np.sum(reward_error) / len(self.test_episodes) 234 | 235 | print("final fitness compute time {0}\n".format(time.time() - t0)) 236 | 237 | 238 | def run(): 239 | # Load the config file, which is assumed to live in 240 | # the same directory as this script. 241 | local_dir = os.path.dirname(__file__) 242 | config_path = os.path.join(local_dir, 'config') 243 | config = neat.Config(LanderGenome, neat.DefaultReproduction, 244 | neat.DefaultSpeciesSet, neat.DefaultStagnation, 245 | config_path) 246 | 247 | pop = neat.Population(config) 248 | stats = neat.StatisticsReporter() 249 | pop.add_reporter(stats) 250 | pop.add_reporter(neat.StdOutReporter(True)) 251 | # Checkpoint every 25 generations or 900 seconds. 252 | pop.add_reporter(neat.Checkpointer(25, 900)) 253 | 254 | # Run until the winner from a generation is able to solve the environment 255 | # or the user interrupts the process. 256 | ec = PooledErrorCompute() 257 | while 1: 258 | try: 259 | gen_best = pop.run(ec.evaluate_genomes, 5) 260 | 261 | #print(gen_best) 262 | 263 | visualize.plot_stats(stats, ylog=False, view=False, filename="fitness.svg") 264 | 265 | plt.plot(ec.episode_score, 'g-', label='score') 266 | plt.plot(ec.episode_length, 'b-', label='length') 267 | plt.grid() 268 | plt.legend(loc='best') 269 | plt.savefig("scores.svg") 270 | plt.close() 271 | 272 | mfs = sum(stats.get_fitness_mean()[-5:]) / 5.0 273 | print("Average mean fitness over last 5 generations: {0}".format(mfs)) 274 | 275 | mfs = sum(stats.get_fitness_stat(min)[-5:]) / 5.0 276 | print("Average min fitness over last 5 generations: {0}".format(mfs)) 277 | 278 | # Use the best genomes seen so far as an ensemble-ish control system. 279 | best_genomes = stats.best_unique_genomes(3) 280 | best_networks = [] 281 | for g in best_genomes: 282 | best_networks.append(neat.nn.FeedForwardNetwork.create(g, config)) 283 | 284 | solved = True 285 | best_scores = [] 286 | for k in range(100): 287 | observation = env.reset() 288 | score = 0 289 | step = 0 290 | while 1: 291 | step += 1 292 | # Use the total reward estimates from all five networks to 293 | # determine the best action given the current state. 294 | votes = np.zeros((4,)) 295 | for n in best_networks: 296 | output = n.activate(observation) 297 | votes[np.argmax(output)] += 1 298 | 299 | best_action = np.argmax(votes) 300 | observation, reward, done, info = env.step(best_action) 301 | score += reward 302 | env.render() 303 | if done: 304 | break 305 | 306 | ec.episode_score.append(score) 307 | ec.episode_length.append(step) 308 | 309 | best_scores.append(score) 310 | avg_score = sum(best_scores) / len(best_scores) 311 | print(k, score, avg_score) 312 | if avg_score < 200: 313 | solved = False 314 | break 315 | 316 | if solved: 317 | print("Solved.") 318 | 319 | # Save the winners. 320 | for n, g in enumerate(best_genomes): 321 | name = 'winner-{0}'.format(n) 322 | with open(name+'.pickle', 'wb') as f: 323 | pickle.dump(g, f) 324 | 325 | visualize.draw_net(config, g, view=False, filename=name+"-net.gv") 326 | visualize.draw_net(config, g, view=False, filename=name+"-net-enabled.gv", 327 | show_disabled=False) 328 | visualize.draw_net(config, g, view=False, filename=name+"-net-enabled-pruned.gv", 329 | show_disabled=False, prune_unused=True) 330 | 331 | break 332 | except KeyboardInterrupt: 333 | print("User break.") 334 | break 335 | 336 | env.close() 337 | 338 | 339 | if __name__ == '__main__': 340 | run() -------------------------------------------------------------------------------- /src/q_agent_dcn_ema1020close.py: -------------------------------------------------------------------------------- 1 | # agent_dcn_v4: Uses RETURN DE MACD ADELANTADO 10 ticks(signal 8 from q-datagen_c_v4) regression signal to decide action 2 | # v4 do not use timedistributed and swap axes of inputs and also seed the random seed for reproducible results 3 | 4 | # seed numpy random number generator to enable reproducible results 5 | print("Seed numpy random number generator") 6 | from numpy.random import seed 7 | seed(1) 8 | print("Seed tensorflow random number generator") 9 | from tensorflow import set_random_seed 10 | set_random_seed(2) 11 | 12 | import gym 13 | import gym.wrappers 14 | import gym_forex 15 | from gym.envs.registration import register 16 | import sys 17 | import neat 18 | import os 19 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 20 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 21 | from joblib import load 22 | from sklearn import svm 23 | import numpy as np 24 | import matplotlib.pyplot as plt 25 | from sklearn.metrics import mean_squared_error 26 | import operator 27 | from numpy import genfromtxt 28 | import csv 29 | from sklearn import svm 30 | from operator import add, sub 31 | from joblib import dump, load 32 | from sklearn import preprocessing 33 | from keras.models import Sequential, load_model 34 | from keras.layers import Conv2D,Conv1D, MaxPooling2D, MaxPooling1D 35 | from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization, TimeDistributed 36 | from keras.layers import LSTM 37 | from keras.optimizers import SGD, Adamax 38 | import copy 39 | 40 | import random 41 | 42 | ## \class QAgent 43 | ## \brief Q-Learning agent that uses an OpenAI gym environment for fx trading 44 | ## estimating for each tick, the optimal SL, TP, and Volume. 45 | class QAgent(): 46 | ## init method 47 | ## Loads the validation dataset, loads the pre-trained models 48 | # initialize forex environment. 49 | def __init__(self): 50 | # percentage of noise to add to an action 51 | # TODO: cambiar acciones para que solo se cierren ordenes por SL o TP (dep de volatility) 52 | self.noise = 0.0 53 | # TODO: probar con órdenes con duración mínima en ticks (solo se puden cerrar por TP/SL y por acttion si se ha superado el min_duartion) 54 | # noise0, min_duratopn = 0 bal=241k 55 | # noise0, min_duration = 20 bal=43k 56 | # noise 0.25, min_duration = 20 bal=1k 57 | self.duration = 5 58 | self.min_duration = 0 59 | self.th_open = 0.2 60 | self.th_close = 0.1 61 | 62 | # TODO: probar con órdenes que solo se cierran por SL/TP 63 | # TODO: hacer gridsearch de SL/TP 64 | # TODO: en caso ideal sin ruido, probar si ganancia incrementa con volumen controlado por volatility 65 | # TODO: probar si mejora SL/TP controlados por volatilidad respecto a los mejores fijos encontrados por gridsearch 66 | # First argument is the validation dataset, including headers indicating maximum and minimum per feature 67 | self.vs_f = sys.argv[1] 68 | # Second argument is the prefix (including path) for the dcn pre-trained models 69 | # for the actions, all modes are files with .svm extention and the prefix is 70 | # concatenated with a number indicating the action: 71 | # 0 = Buy/CloseSell/nopCloseBuy 72 | # 1 = Sell/CloseBuy/nopCloseSell 73 | # 2 = No Open Buy 74 | # 3 = No Open Sell 75 | self.model_prefix = sys.argv[2] 76 | # third argument is the path of the datasset to be used in the gym environment (not q-datagen generated, without headers) 77 | self.env_f = sys.argv[3] 78 | # initialize gym-forex env (version 4) 79 | self.test_episodes = [] 80 | self.generation = 0 81 | self.min_reward = -15 82 | self.max_reward = 15 83 | self.episode_score = [] 84 | self.episode_length = [] 85 | self.svr_rbf = svm.SVR(kernel='rbf') 86 | self.num_s = 19 87 | self.model = [self.svr_rbf] * self.num_s 88 | 89 | self.max_index = 0 90 | self.vs_data = [] 91 | self.vs_num_ticks = 0 92 | self.vs_num_columns = 0 93 | self.obsticks = 30 94 | self.window_size = self.obsticks 95 | # TODO: obtener min y max de actions from q-datagen dataset headers 96 | self.min_TP = 50 97 | self.max_TP = 1000 98 | self.min_SL = 50 99 | self.max_SL = 1000 100 | self.min_volume = 0.0 101 | self.max_volume = 0.1 102 | self.security_margin = 0.1 103 | self.test_action = 0 104 | self.num_f = 0 105 | self.num_features = 0 106 | self.action_prev = [0] 107 | self.action = [0] 108 | self.raw_action = [0] 109 | # load pre-processing settings 110 | self.pt = preprocessing.PowerTransformer() 111 | print("loading pre-processing.PowerTransformer() settings for the generated dataset") 112 | self.pt = load(self.vs_f+'.powertransformer') 113 | # load feature-selection mask 114 | print("loading pre-processing feature selection mask") 115 | self.mask = load(self.vs_f+'.feature_selection_mask') 116 | # variables for output csv files for observations and prediction 117 | self.out_obs = [] 118 | self.out_act = [] 119 | # register the gym-forex openai gym environment 120 | # TODO: extraer obs_ticks como el window_size, desde los headers de salida de q-datagen 121 | register( 122 | id='ForexValidationSet-v1', 123 | entry_point='gym_forex.envs:ForexEnv6', 124 | kwargs={'dataset': self.env_f ,'max_volume':self.max_volume, 'max_sl':self.max_SL, 125 | 'max_tp':self.max_TP, 'min_sl':self.min_SL, 126 | 'min_tp':self.min_TP,'obsticks':self.obsticks, 127 | 'capital':800, 'leverage':100, 'num_features': 13} 128 | ) 129 | # make openai gym environments 130 | self.env_v = gym.make('ForexValidationSet-v1') 131 | # Shows the action and observation space from the forex_env, its observation space is 132 | # bidimentional, so it has to be converted to an array with nn_format() for direct ANN feed. (Not if evaluating with external DQN) 133 | print("action space: {0!r}".format(self.env_v.action_space)) 134 | print("observation space: {0!r}".format(self.env_v.observation_space)) 135 | # read normalization maximum and minimum per feature 136 | # n_data_full = genfromtxt(self.vs_f, delimiter=',',dtype=str,skip_header=0) 137 | with open(self.vs_f, newline='') as f: 138 | reader = csv.reader(f) 139 | n_data = next(reader) # gets the first line 140 | # read header from vs_f 141 | #n_data = n_data_full[0].tolist() 142 | self.num_columns = len(n_data) 143 | print("vs_f num_columns = ", self.num_columns) 144 | # minimum and maximum per feature for normalization before evaluation in pretrained models 145 | self.max = [None] * self.num_columns 146 | self.min = [None] * self.num_columns 147 | for i in range(0, self.num_columns-self.num_s): 148 | header_cell = n_data[i] 149 | #print("header_cell = ", header_cell, "type = " ,type(header_cell)) 150 | data = header_cell.split("_") 151 | num_parts = len(data) 152 | self.max[i] = float(data[num_parts-1]) 153 | self.min[i] = float(data[num_parts-2]) 154 | # data was mormalized as: my_data_n[0, i] = (2.0 * (my_data[0, i] - min[i]) / (max[i] - min[i])) - 1 155 | 156 | ## Generate DCN input matrix 157 | def dcn_input(self, data): 158 | #obs_matrix = np.array([np.array([0.0] * self.num_features)]*len(data), dtype=object) 159 | obs_matrix = [] 160 | obs_row = [] 161 | obs_frame = [] 162 | obs = np.array([np.array([0.0] * self.num_features)] * self.window_size) 163 | # for each observation 164 | data_p = np.array(data) 165 | num_rows = len(data) 166 | # counter of rows of data array 167 | c_row = 0 168 | while c_row < num_rows: 169 | # invert the order of the observations, in the first element is the newest value 170 | obs_frame = [] 171 | for j in range(0,self.window_size): 172 | # create an array of size num_features 173 | obs_row = [] 174 | for k in range(0,self.num_features): 175 | obs_row.append(data_p[c_row, k*self.window_size + j ]) 176 | # obs_frame contains window_size rows with num_features columns with the newest observation in cell[0] 177 | obs_frame.append(copy.deepcopy(obs_row)) 178 | # obs_matrix contains files with observations of size (window_Size, num_features) 179 | obs_matrix.append(copy.deepcopy(obs_frame)) 180 | c_row = c_row + 1 181 | #print("Formating of data for DCN input performed succesfully.") 182 | return np.array(obs_matrix) 183 | 184 | ## the action model is the same q-datagen generated dataset 185 | def load_action_models(self, signal): 186 | self.svr_rbf = load_model(self.model_prefix + str(signal)+'.dcn') 187 | # get the number of observations 188 | self.vs_data = genfromtxt(self.vs_f, delimiter=',') 189 | self.vs_num_ticks = len(self.vs_data) 190 | self.vs_num_columns = len(self.vs_data[0]) 191 | self.num_f = self.vs_num_columns - self.num_s 192 | self.num_features = self.num_f // self.window_size 193 | self.num_ticks = self.vs_num_ticks 194 | 195 | ## For an observation for each tick, returns 0 if the slope of the future(10) MACD signal (output 16 zero-based) is negative, 1 if its positive. 196 | def decide_next_action(self, normalized_observation): 197 | # TODO: evaluar el modelo de regresion y retornar como action un arreglo con el valor predicho por cada modelo. (0= clasif,1=regresion) 198 | # evaluate all models with the observation data window 199 | self.action_prev = copy.deepcopy(self.action) 200 | self.action = [] 201 | self.max_index = 0 202 | action_list = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0] 203 | vs = np.array(normalized_observation) 204 | # evaluate all models with the observation data window 205 | self.action = [] 206 | vs = np.array(normalized_observation) 207 | vs_r = np.reshape(vs, (1, -1)) 208 | #print ("vs_r = ",vs_r) 209 | obs = self.dcn_input(vs_r) 210 | np.set_printoptions(threshold=sys.maxsize) 211 | obs = np.swapaxes(obs, 1, 2) 212 | #print("obs = ", obs) 213 | #print("obs.shape = ", obs.shape) 214 | action_list[0] = self.svr_rbf.predict(obs) 215 | print("action_list[0] = ", action_list[0]) 216 | # TODO: Add observation to output csv file array, Quitar cuando pretreiner y agent_dcn tengan las mismas salidas y entradas 217 | # TODO: Add Normalized obervation to test if the cdn_input function is working well 218 | self.out_obs.append(copy.deepcopy(obs)) 219 | #self.out_obs.append(obs) 220 | # TODO: Add action to output csv file array, Quitar cuando pretreiner y agent_dcn tengan las mismas salidas y entradas 221 | self.out_act.append(copy.deepcopy(action_list[0][0])) 222 | # seto the returned action to actionlist 223 | self.action = copy.deepcopy(action_list) 224 | #print("action=",self.action) 225 | 226 | return self.action 227 | 228 | ## normalize the observation matrix, converts it to a list feedable to a pretrained DcN 229 | # oldest data is first in dataset and also in observation matrix 230 | # input obs_matrix, prev obs_matrix, output:row 231 | def normalize_observation(self, observation, observation_prev): 232 | # observation is a list with size num_features of numpy.deque of size 30 (time window) 233 | # TODO: PORQUE num_columns_o es 29? 234 | n_obs = [] 235 | l_diff = [] 236 | #print("observation = ", observation) 237 | num_columns_o = len(observation) 238 | # TODO: Cambiar a recorrido de l_obs restando el anterior y solo usar l_obs_prev para el primer elemento 239 | for i in range (0, num_columns_o): 240 | l_obs = list(observation[i]) 241 | l_obs_prev = list(observation_prev[i]) 242 | for j in range (0, self.window_size): 243 | diff = l_obs[j] - l_obs_prev[j] 244 | l_diff.append(diff) 245 | for l in l_obs: 246 | n_obs.append(l) 247 | 248 | for l in l_diff: 249 | n_obs.append(l) 250 | n_obs_n = np.array(n_obs).reshape(1,-1) 251 | n_obs_o = self.pt.transform(n_obs_n) 252 | n_o = n_obs_o[0].tolist() 253 | n_obs=np.array(n_o) 254 | n_obs = n_obs[self.mask] 255 | return n_obs 256 | 257 | ## Function transform_action: convert the output of the raw_action into the 258 | ## denormalized values to be used in the simulation environment. 259 | ## increase the SL in the sec_margin% and decrease the TP in the same %margin, volume is also reduced in the %margin 260 | def transform_action(self, order_status): 261 | # order_status: 0 nop, -1=sell,1=buy 262 | # the variable self.raw_action contains the output of decide_next_action, which is an array of 3 values, MACD signal return, RSI return and MACD main - signal >0? 263 | # the output actions are: 0=TP,1=SL,2=volume(dInv). 264 | # if there is no opened order 265 | act = [] 266 | # initialize values for next order , dir: 1=buy, -1=sell, 0=nop 267 | dire = 0.0 268 | tp = 1.0 269 | tp_a=tp 270 | sl = 1.0 271 | vol = 1.0 272 | 273 | action_diff = self.raw_action[self.test_action] - self.action_prev[self.test_action] 274 | # TODO: if there is an opened order, increases de duration counter, else set it to 0 275 | if (order_status==0): 276 | self.duration = 0 277 | else: 278 | self.duration = self.duration + 1 279 | # TODO: add min_duration constraint to evaluate if closing an open order with an action 280 | # if there is no opened order 281 | if order_status == 0: 282 | # si el action[0] > 0, compra, sino vende 283 | if (self.raw_action[0] > 0.3): 284 | # opens buy order 285 | dire = 1.0 286 | tp_a = 0.1 287 | if (self.raw_action[0] < -0.3): 288 | # opens sell order 289 | dire = -1.0 290 | tp_a = 0.1 291 | # if there is an existing buy order 292 | if (order_status == 1) and (self.duration > self.min_duration): 293 | # si action[0] == 0 cierra orden de buy 294 | if (self.raw_action[0] < 0): 295 | # closes buy order 296 | dire = -1.0 297 | # if there is an existing sell order 298 | if (order_status == -1) and (self.duration > self.min_duration): 299 | # if action[0]>0, closes the sell order 300 | if (self.raw_action[0] > 0): 301 | # closes sell order 302 | dire = 1.0 303 | # verify limits of sl and tp, TODO: quitar cuando estén desde fórmula 304 | sl_a = 1.0 305 | 306 | # Create the action list output [tp, sl, vol, dir] 307 | act.append(tp_a) 308 | # TODO: en el simulador, implmeentar min_tp ysl 309 | act.append(sl_a) 310 | act.append(vol) 311 | act.append(dire) 312 | return act 313 | 314 | ## Evaluate all the steps on the simulation choosing in each step the best 315 | ## action, given the observations per tick. 316 | ## \returns the final balance and the cummulative reward 317 | # Posssible actions: 318 | # 0 = Buy/CloseSell/nopCloseBuy 319 | # 1 = Sell/CloseBuy/nopCloseSell 320 | # 2 = No Open Buy 321 | # 3 = No Open Sell 322 | def evaluate(self, max_ticks): 323 | # calculate the validation set score 324 | hist_scores = [] 325 | #perform first observation 326 | observation = self.env_v.reset() 327 | #print("observation = ", observation) 328 | observation_prev = copy.deepcopy(observation) 329 | # action = nop 330 | action = [] 331 | # initialize values for next order , dir: 1=buy, -1=sell, 0=nop 332 | dire = 0.0 333 | tp = 1.0 334 | sl = 1.0 335 | vol = 1.0 336 | score = 0.0 337 | step = 1 338 | order_status=0 339 | equity=[] 340 | balance=[] # Create the action list output [tp, sl, vol, dir] 341 | action.append(tp) 342 | action.append(sl) 343 | action.append(vol) 344 | action.append(dire) 345 | #do second observation tobe able to normalize following observations 346 | observation, reward, done, info = self.env_v.step(action) 347 | order_status=info['order_status'] 348 | equity.append(info['equity']) 349 | balance.append(info['balance']) 350 | 351 | #TODO: ERROR 352 | # normalize observation appended with its return obtained from previous and current observations 353 | normalized_observation = agent.normalize_observation(observation, observation_prev) 354 | #print("normalized_observation = ", normalized_observation) 355 | 356 | while 1: 357 | step += 1 358 | # si el step > 2, hacce el resto, sono usa vector e zeros como accion 359 | observation_prev = copy.deepcopy(observation) 360 | # TODO: Test, quitar cuando coincidan observations de agent_dcn y pretrainer 361 | #if step > 1: 362 | # #print("a=", raw_action[0], " order_status=",info['order_status'], " num_closes=", info['num_closes']," balance=",info['balance'], " equity=", info['equity']) 363 | # print("observation") 364 | 365 | if (step < ((3*self.num_ticks)//4)+3) or (step > (self.vs_num_ticks-self.obsticks)): 366 | #print ("Skippig limits, step = ", step) 367 | # action = nop 368 | action = [] 369 | # initialize values for next order , dir: 1=buy, -1=sell, 0=nop 370 | dire = 0.0 371 | tp = 1.0 372 | sl = 1.0 373 | vol = 1.0 374 | # Create the action list output [tp, sl, vol, dir] 375 | action.append(tp) 376 | action.append(sl) 377 | action.append(vol) 378 | action.append(dire) 379 | else: 380 | self.raw_action = self.decide_next_action(normalized_observation) 381 | action = self.transform_action(order_status) 382 | equity.append(info['equity']) 383 | balance.append(info['balance']) 384 | 385 | observation, reward, done, info = self.env_v.step(action) 386 | order_status=info['order_status'] 387 | 388 | 389 | # TODO: Hacer gráfico de balance y equity 390 | if (step < ((3*self.num_ticks)//4)+3) or (step > (self.vs_num_ticks-self.obsticks)): 391 | normalized_observation = normalized_observation 392 | else: 393 | normalized_observation = self.normalize_observation(observation, observation_prev) 394 | score += reward 395 | #env_v.render() 396 | if done or (step > max_ticks): 397 | break 398 | 399 | # TODO : Hacer skip de valores leídos por agent hasta el primero del vs 400 | # TODO: export output csv with observations for the validation set, Quitar cuando pretrainer y agent_dcn tengan las mismas obs y act 401 | out_obs_n = np.array(self.out_obs) 402 | print("out_obs_n.shape = ", out_obs_n.shape) 403 | with open('a_output_obs.csv' , 'w', newline='') as myfile: 404 | wr = csv.writer(myfile) 405 | wr.writerows(out_obs_n) 406 | # TODO: Add action to output csv file array, Quitar cuando pretreiner y agent_dcn tengan las mismas salidas y entradas 407 | print("Finished generating validation set observations.") 408 | # export output csv with actions for the validation set 409 | with open('a_output_act.csv' , 'w', newline='') as myfile: 410 | wr = csv.writer(myfile) 411 | wr.writerows(self.out_act) 412 | print("Finished generating validation set actions per observation.") 413 | lw = 2 414 | y_rbf = balance 415 | y_v = equity 416 | x_seq = list(range(0, len(balance))) 417 | fig=plt.figure() 418 | plt.plot(x_seq, y_v, color='darkorange', label='Equity') 419 | plt.plot(x_seq, y_rbf, color='navy', lw=lw, label='Balance') 420 | plt.xlabel('tick') 421 | plt.ylabel('value') 422 | plt.title('Performance') 423 | plt.legend() 424 | fig.savefig('agent_test_8.png') 425 | #plt.show() 426 | 427 | hist_scores.append(score) 428 | avg_score = sum(hist_scores) / len(hist_scores) 429 | print("Validation Set Score = ", avg_score) 430 | print("*********************************************************") 431 | return info['balance'], avg_score 432 | 433 | def show_results(self): 434 | test=0 435 | 436 | # main function 437 | if __name__ == '__main__': 438 | agent = QAgent() 439 | #agent.svr_rbf = agent.set_dcn_model() 440 | training_signal = 7 441 | agent.load_action_models(training_signal) 442 | scores = [] 443 | balances = [] 444 | for i in range(0, 1): 445 | print("Testing signal ",training_signal +i) 446 | agent.test_action = i 447 | agent.load_action_models(training_signal) 448 | balance,score = agent.evaluate(10000000) 449 | scores.append(score) 450 | balances.append(balance) 451 | print("Results:") 452 | for i in range(0, 1): 453 | print("Signal ", 8+i, " balance=",balances[i], " score=",scores[i]) 454 | --------------------------------------------------------------------------------