├── .gitignore ├── misc └── cover_img_neat.png ├── __pycache__ ├── NN.cpython-36.pyc ├── EPANN.cpython-36.pyc ├── Node.cpython-36.pyc ├── agent1.cpython-36.pyc ├── GymAgent.cpython-36.pyc ├── PopTests.cpython-36.pyc ├── Population.cpython-36.pyc ├── Walker_1D.cpython-36.pyc ├── CartPoleAgent.cpython-36.pyc ├── CartpoleAgent.cpython-36.pyc ├── PendulumAgent.cpython-36.pyc ├── movie_combine.cpython-36.pyc ├── profileOutput.cpython-36.pyc ├── LunarLanderAgent.cpython-36.pyc └── PuckworldAgent.cpython-36.pyc ├── scrap_old_test ├── networkx_test.py ├── Walker_1D.py ├── LunarLanderAgent.py ├── CartPoleAgent.py ├── PendulumAgent.py ├── PuckworldAgent.py └── agent1.py ├── createEnvJson.py ├── gym_env_info.json ├── README.md ├── ev1.py ├── ablation_test.py ├── movie_combine.py └── classes ├── GymAgent.py ├── Node.py ├── FileSystemTools.py ├── PopTests.py ├── Population.py └── EPANN.py /.gitignore: -------------------------------------------------------------------------------- 1 | misc_runs/ 2 | save_runs/ 3 | scrap_old_test* 4 | misc_runs* 5 | save_runs* 6 | 7 | -------------------------------------------------------------------------------- /misc/cover_img_neat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/misc/cover_img_neat.png -------------------------------------------------------------------------------- /__pycache__/NN.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/NN.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/EPANN.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/EPANN.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/Node.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/Node.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/agent1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/agent1.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/GymAgent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/GymAgent.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/PopTests.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/PopTests.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/Population.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/Population.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/Walker_1D.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/Walker_1D.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/CartPoleAgent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/CartPoleAgent.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/CartpoleAgent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/CartpoleAgent.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/PendulumAgent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/PendulumAgent.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/movie_combine.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/movie_combine.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/profileOutput.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/profileOutput.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/LunarLanderAgent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/LunarLanderAgent.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/PuckworldAgent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/PuckworldAgent.cpython-36.pyc -------------------------------------------------------------------------------- /scrap_old_test/networkx_test.py: -------------------------------------------------------------------------------- 1 | 2 | from EPANN import EPANN 3 | from LunarLanderAgent import LunarLanderAgent 4 | 5 | 6 | e = EPANN(agent_class=LunarLanderAgent, render_type='gym', N_init_hidden_nodes=0, init_IO_weights=True) 7 | 8 | e.plotNetwork(show_plot=True, node_legend=True) 9 | 10 | 11 | 12 | 13 | 14 | 15 | # 16 | -------------------------------------------------------------------------------- /createEnvJson.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./classes') 3 | import json 4 | 5 | env_info_dict = { 6 | 7 | 'Pendulum' : { 8 | 'gym_env_name' : 'Pendulum-v0', 9 | 'state_labels' : ['cos(ang)', 'sin(ang)', 'ang_vel'], 10 | 'action_labels' : ['torque'], 11 | 'action_space_type' : 'continuous', 12 | 'max_episode_steps' : 200 13 | }, 14 | 15 | 'LunarLander' : { 16 | 'gym_env_name' : 'LunarLander-v2', 17 | 'state_labels' : ['pos_x', 'pos_y', 'v_x', 'v_y', 'angle', 'v_ang'], 18 | 'action_labels' : ['nothing', 'engine_L', 'engine_main', 'engine_R'], 19 | 'action_space_type' : 'discrete', 20 | 'max_episode_steps' : 500 21 | }, 22 | 23 | 'CartPole' : { 24 | 'gym_env_name' : 'CartPole-v0', 25 | 'state_labels' : ['pos_cart', 'v_cart','pole_angle', 'v_poletip'], 26 | 'action_labels' : ['cart_L', 'cart_R',], 27 | 'action_space_type' : 'discrete', 28 | 'max_episode_steps' : 200 29 | }, 30 | 31 | 32 | } 33 | 34 | 35 | fname = 'gym_env_info.json' 36 | 37 | with open(fname, 'w') as outfile: 38 | json.dump(env_info_dict, outfile, indent=4) 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | # 49 | -------------------------------------------------------------------------------- /gym_env_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "Pendulum": { 3 | "gym_env_name": "Pendulum-v0", 4 | "state_labels": [ 5 | "cos(ang)", 6 | "sin(ang)", 7 | "ang_vel" 8 | ], 9 | "action_labels": [ 10 | "torque" 11 | ], 12 | "action_space_type": "continuous", 13 | "max_episode_steps": 200 14 | }, 15 | "LunarLander": { 16 | "gym_env_name": "LunarLander-v2", 17 | "state_labels": [ 18 | "pos_x", 19 | "pos_y", 20 | "v_x", 21 | "v_y", 22 | "angle", 23 | "v_ang" 24 | ], 25 | "action_labels": [ 26 | "nothing", 27 | "engine_L", 28 | "engine_main", 29 | "engine_R" 30 | ], 31 | "action_space_type": "discrete", 32 | "max_episode_steps": 500 33 | }, 34 | "CartPole": { 35 | "gym_env_name": "CartPole-v0", 36 | "state_labels": [ 37 | "pos_cart", 38 | "v_cart", 39 | "pole_angle", 40 | "v_poletip" 41 | ], 42 | "action_labels": [ 43 | "cart_L", 44 | "cart_R" 45 | ], 46 | "action_space_type": "discrete", 47 | "max_episode_steps": 200 48 | } 49 | } -------------------------------------------------------------------------------- /scrap_old_test/Walker_1D.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | class Walker_1D: 6 | 7 | 8 | def __init__(self): 9 | 10 | self.lims = np.array([-1.0, 1.0]) 11 | self.width = self.lims[1] - self.lims[0] 12 | 13 | self.step_size = self.width/50.0 14 | 15 | self.position = 0 16 | self.target_position = None 17 | 18 | self.N_state_terms = len(self.getStateVec()) 19 | self.N_actions = 2 20 | 21 | 22 | 23 | def getStateVec(self): 24 | return(np.array([self.position, self.target_position])) 25 | 26 | 27 | def initEpisode(self): 28 | self.resetPosition() 29 | self.resetTarget() 30 | 31 | 32 | def resetTarget(self): 33 | 34 | x = np.random.random() 35 | self.target_position = self.lims[0] + self.width*x 36 | # print('new target pos: {:.3f}'.format(self.target_position)) 37 | 38 | 39 | def resetPosition(self): 40 | self.position = 0 41 | 42 | 43 | def iterate(self, action): 44 | # Action 0 is go L, action 1 is go R. 45 | add_x = (action - 0.5)*2 46 | # maps 0,1 to -1,1 47 | self.position += add_x*self.step_size 48 | self.position = max(self.position, self.lims[0] + self.step_size) 49 | self.position = min(self.position, self.lims[1] - self.step_size) 50 | return(self.reward(), self.getStateVec(), False) 51 | 52 | 53 | 54 | def reward(self): 55 | 56 | if abs(self.position - self.target_position) <= 1.2*self.step_size: 57 | self.resetTarget() 58 | return(1.0) 59 | else: 60 | return(-0.01) 61 | 62 | 63 | 64 | def drawState(self, ax): 65 | 66 | ax.clear() 67 | ax.set_xlim(tuple(self.lims)) 68 | ax.set_ylim(tuple(self.lims)) 69 | 70 | ax.set_xlabel('x') 71 | ax.set_ylabel('y') 72 | ax.set_aspect('equal') 73 | 74 | ag = plt.Circle((self.position, 0), 0.03, color='tomato') 75 | ax.add_artist(ag) 76 | 77 | if self.target_position is not None: 78 | target = plt.Circle((self.target_position, 0), 0.03, color='seagreen') 79 | ax.add_artist(target) 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | # 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Experiments with playing OpenAI games with NEAT 3 | ========================================== 4 | 5 |

6 | 7 |

8 | 9 | Overview 10 | -------------------------------- 11 | 12 | This project uses Kenneth Stanley's popular [NEAT framework](https://en.wikipedia.org/wiki/Neuroevolution_of_augmenting_topologies) to evolve neural networks to play OpenAI gym games. `Node` objects are the basic unit of `EPANN` objects, a collection of which forms the `Population` object. Tests to compare different population parameters can be done with `PopTests.py`. 13 | 14 | 15 | Main scripts 16 | ------------------------------- 17 | 18 | These are the scripts I run, that use the classes described below. They live in the main dir. A brief description: 19 | 20 | * `ev1.py` - Creates a `Population` object for a given class and evolves the population. 21 | * `ablation_test.py` - Takes a fully formed network and repeatedly removes a connection and then evaluates the FF of that network, starting from the smallest connection weight up, to find the critical ones. 22 | * `movie_combine.py` - Combines several movie files into a single movie file in a grid format. 23 | * `createEnvJson.py` - Writes a dict of info about the gym envs (or other envs) to use for labels, etc 24 | 25 | 26 | Classes 27 | -------------------------------- 28 | 29 | * `Node.py` - The basic unit of the network. Can be set to be an input, output, or bias node. 30 | * `EPANN.py` - The network. It starts from just input, bias, and output nodes. The number of I/O nodes are based on the number of inputs/outputs of the agent class being simulated. Nodes and connections are then added and removed via mutations. It is also responsible for running an episode of the agent class to evaluate the FF. 31 | * `Population.py` - This creates a population of `EPANN` objects and repeatedly evaluates their FF's, sorts them via their FF's, and then keeps and mutates the best of the population. This is all done via its `evolve()` function. 32 | * `PopTests.py` - This does several evolutions of different `Population` objects, with different parameters, to compare how different population parameters (trading off `N_gen` vs `N_pop` for example) affects evolution. 33 | * `GymAgent.py` - This is a generic wrapper class for `gym` agents. 34 | * `FileSystemTools.py` - Just a little list of custom functions that I found myself using often, mostly string formatting type stuff. 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | # 45 | -------------------------------------------------------------------------------- /ev1.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./classes') 3 | from EPANN import EPANN 4 | from Population import Population 5 | import RunTools as rt 6 | from GymAgent import GymAgent 7 | from time import time 8 | import numpy as np 9 | 10 | ea = EPANN(agent_class=GymAgent, env_name='CartPole') 11 | 12 | #ea.plotNetwork() 13 | 14 | ea.addConnectingWeight((0,4)) 15 | ea.addConnectingWeight((1,4)) 16 | ea.addNodeInBetween(1,4) 17 | 18 | ea.addConnectingWeight((2,5)) 19 | ea.addNode() 20 | ea.addConnectingWeight((2,6)) 21 | ea.addConnectingWeight((3,4)) 22 | ea.addConnectingWeight((3,5)) 23 | ea.addConnectingWeight((6,5)) 24 | #ea.addAtomInBetween((2,5)) 25 | 26 | 27 | N_tests = 100000 28 | 29 | inputs = np.random.random((N_tests, 4)) 30 | 31 | st = time() 32 | for i in range(N_tests): 33 | 34 | ea.forwardPass(inputs[i]) 35 | 36 | 37 | print('time elapsed:', time() - st) 38 | 39 | #ea.plotNetwork() 40 | 41 | 42 | 43 | 44 | exit() 45 | 46 | 47 | p1 = Population(agent_class=GymAgent, env_name='CartPole', N_pop=64, mut_type='change_topo', std=1.0, render_type='gym') 48 | 49 | p1.evolve(N_gen=128, N_episode_steps=200, N_trials_per_agent=2, N_runs_with_best=2, record_final_runs=False, show_final_runs=False) 50 | 51 | exit(0) 52 | 53 | 54 | 55 | 56 | evolve_params = { 57 | 'N_runs' : 3, 58 | 'agent_class' : GymAgent, 59 | 'env_name' : 'LunarLander', 60 | 'N_pop' : 64, 61 | 'mut_type' : 'change_topo', 62 | 'std' : [0.01, 0.1, 1, 10], 63 | 'N_gen' : 256, 64 | 'N_trials_per_agent' : 2, 65 | 'N_runs_with_best' : 9, 66 | 'record_final_runs' : True, 67 | 'show_final_runs' : False 68 | } 69 | 70 | 71 | rt.varyParam(object_class=Population, run_fn=Population.evolve, run_result_var='best_individ_avg_score', **evolve_params) 72 | 73 | exit() 74 | 75 | 76 | 77 | 78 | evolve_params = { 79 | 'N_runs' : 3, 80 | 'agent_class' : GymAgent, 81 | 'env_name' : 'LunarLander', 82 | 'N_pop' : 64, 83 | 'mut_type' : 'change_topo', 84 | 'std' : [0.01, 0.1, 1.0, 10.0], 85 | 'N_gen' : 256, 86 | 'N_trials_per_agent' : 2, 87 | 'N_runs_with_best' : 9, 88 | 'record_final_runs' : True, 89 | 'show_final_runs' : False 90 | } 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | e = EPANN(agent_class=PendulumAgent) 101 | 102 | e.loadNetworkFromFile( 103 | '/home/declan/Documents/code/evo1/misc_runs/evolve_22-01-2019_18-01-04__PendulumAgent' + 104 | '/' + 'bestNN_PendulumAgent_22-01-2019_18-01-04' + '.json' 105 | ) 106 | 107 | exit(0) 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | # 116 | -------------------------------------------------------------------------------- /scrap_old_test/LunarLanderAgent.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import gym 4 | from gym import wrappers 5 | import FileSystemTools as fst 6 | 7 | ''' 8 | 9 | need to provide: 10 | 11 | --state labels (for each state var) 12 | --action labels (for each action var) 13 | --N_state_terms 14 | --N_actions 15 | 16 | functions: 17 | 18 | --getStateVec() 19 | --initEpisode() 20 | --iterate() (returns a tuple of (reward, state, boolean isDone)) 21 | 22 | ''' 23 | 24 | 25 | 26 | class LunarLanderAgent: 27 | 28 | 29 | def __init__(self, **kwargs): 30 | 31 | self.env = gym.make('LunarLander-v2') 32 | gym.logger.set_level(40) 33 | self.state_labels = ['pos_x', 'pos_y', 'v_x', 'v_y', 'angle', 'v_ang'] 34 | self.action_labels = ['nothing', 'engine_L', 'engine_main', 'engine_R',] 35 | # Last two states are whether the legs are touching the ground or not. 36 | # I'm not including them here. 37 | self.N_state_terms = 6 38 | self.N_actions = self.env.action_space.n 39 | self.action_space_type = 'discrete' 40 | self.state = self.env.reset() 41 | dt = fst.getDateString() 42 | self.base_name = f'LunarLander_{dt}' 43 | self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/') 44 | self.monitor_is_on = False 45 | 46 | 47 | 48 | 49 | def setMonitorOn(self): 50 | # It seems like when I call this, it gives a warning about the env not being 51 | # made with gym.make (which it is...), but if I call it only once for the same 52 | # agent, it doesn't run it every time I call it? 53 | #if not self.monitor_is_on: 54 | # 55 | # Also, it seems like you can't record the episode without showing it on the screen. 56 | # See https://github.com/openai/gym/issues/347 maybe? 57 | if True: 58 | self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name) 59 | self.env = wrappers.Monitor(self.env, self.record_dir) 60 | self.monitor_is_on = True 61 | 62 | 63 | def getStateVec(self): 64 | return(self.state[:self.N_state_terms]) 65 | 66 | 67 | def initEpisode(self): 68 | self.state = self.env.reset() 69 | 70 | 71 | def iterate(self, action): 72 | # Action 0 is go L, action 1 is go R. 73 | observation, reward, done, info = self.env.step(action) 74 | self.state = observation 75 | 76 | return(reward, self.state, done) 77 | 78 | 79 | 80 | 81 | 82 | 83 | def drawState(self): 84 | 85 | self.env.render() 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | # 94 | -------------------------------------------------------------------------------- /scrap_old_test/CartPoleAgent.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import gym 4 | from gym import wrappers 5 | import FileSystemTools as fst 6 | 7 | ''' 8 | 9 | need to provide: 10 | 11 | --state labels (for each state var) 12 | --action labels (for each action var) 13 | --N_state_terms 14 | --N_actions 15 | --action_space_type 16 | 17 | functions: 18 | 19 | --getStateVec() 20 | --initEpisode() 21 | --iterate() (returns a tuple of (reward, state, boolean isDone)) 22 | 23 | ''' 24 | 25 | 26 | 27 | class CartPoleAgent: 28 | 29 | 30 | def __init__(self, **kwargs): 31 | 32 | self.env = gym.make('CartPole-v0') 33 | gym.logger.set_level(40) 34 | self.state_labels = ['pos_cart', 'v_cart','pole_angle', 'v_poletip'] 35 | self.action_labels = ['cart_L', 'cart_R',] 36 | # Last two states are whether the legs are touching the ground or not. 37 | # I'm not including them here. 38 | self.N_state_terms = len(self.env.reset()) 39 | self.N_actions = self.env.action_space.n 40 | self.action_space_type = 'discrete' 41 | self.state = self.env.reset() 42 | dt = fst.getDateString() 43 | self.base_name = f'CartPole_{dt}' 44 | self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/') 45 | self.monitor_is_on = False 46 | 47 | 48 | def setMaxEpisodeSteps(self, N_steps): 49 | 50 | self.env._max_episode_steps = N_steps 51 | self.env.spec.max_episode_steps = N_steps 52 | self.env.spec.timestep_limit = N_steps 53 | 54 | 55 | def setMonitorOn(self): 56 | # It seems like when I call this, it gives a warning about the env not being 57 | # made with gym.make (which it is...), but if I call it only once for the same 58 | # agent, it doesn't run it every time I call it? 59 | #if not self.monitor_is_on: 60 | # 61 | # Also, it seems like you can't record the episode without showing it on the screen. 62 | # See https://github.com/openai/gym/issues/347 maybe? 63 | if True: 64 | self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name) 65 | self.env = wrappers.Monitor(self.env, self.record_dir) 66 | self.monitor_is_on = True 67 | 68 | 69 | def getStateVec(self): 70 | return(self.state[:self.N_state_terms]) 71 | 72 | 73 | def initEpisode(self): 74 | self.state = self.env.reset() 75 | 76 | 77 | def iterate(self, action): 78 | # Action 0 is go L, action 1 is go R. 79 | observation, reward, done, info = self.env.step(action) 80 | self.state = observation 81 | 82 | return(reward, self.state, done) 83 | 84 | 85 | 86 | 87 | 88 | 89 | def drawState(self): 90 | 91 | self.env.render() 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | # 100 | -------------------------------------------------------------------------------- /scrap_old_test/PendulumAgent.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import gym 4 | from gym import wrappers 5 | import FileSystemTools as fst 6 | 7 | ''' 8 | 9 | need to provide: 10 | 11 | --state labels (for each state var) 12 | --action labels (for each action var) 13 | --N_state_terms 14 | --N_actions 15 | 16 | functions: 17 | 18 | --getStateVec() 19 | --initEpisode() 20 | --iterate() (returns a tuple of (reward, state, boolean isDone)) 21 | --setMaxEpisodeSteps() 22 | 23 | ''' 24 | 25 | 26 | 27 | class PendulumAgent: 28 | 29 | 30 | def __init__(self, **kwargs): 31 | 32 | self.env = gym.make('Pendulum-v0') 33 | gym.logger.set_level(40) 34 | self.state_labels = ['cos(ang)', 'sin(ang)', 'ang_vel'] 35 | self.action_labels = ['torque'] 36 | # Last two states are whether the legs are touching the ground or not. 37 | # I'm not including them here. 38 | self.N_state_terms = len(self.env.reset()) 39 | self.N_actions = 1 40 | self.action_space_type = 'continuous' 41 | self.state = self.env.reset() 42 | dt = fst.getDateString() 43 | self.base_name = f'Pendulum_{dt}' 44 | self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/') 45 | self.monitor_is_on = False 46 | 47 | 48 | 49 | def setMaxEpisodeSteps(self, N_steps): 50 | 51 | self.env._max_episode_steps = N_steps 52 | self.env.spec.max_episode_steps = N_steps 53 | self.env.spec.timestep_limit = N_steps 54 | 55 | 56 | def closeEnv(self): 57 | # This doesn't seem to be a good idea to use with monitor? 58 | self.env.close() 59 | 60 | 61 | def setMonitorOn(self, show_run=True): 62 | # It seems like when I call this, it gives a warning about the env not being 63 | # made with gym.make (which it is...), but if I call it only once for the same 64 | # agent, it doesn't run it every time I call it? 65 | #if not self.monitor_is_on: 66 | # 67 | # Also, it seems like you can't record the episode without showing it on the screen. 68 | # See https://github.com/openai/gym/issues/347 maybe? 69 | if True: 70 | self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name) 71 | if show_run: 72 | self.env = wrappers.Monitor(self.env, self.record_dir) 73 | else: 74 | self.env = wrappers.Monitor(self.env, self.record_dir, video_callable=False, force=True) 75 | self.monitor_is_on = True 76 | 77 | 78 | def getStateVec(self): 79 | return(self.state[:self.N_state_terms]) 80 | 81 | 82 | def initEpisode(self): 83 | self.state = self.env.reset() 84 | 85 | 86 | def iterate(self, action): 87 | # Action 0 is go L, action 1 is go R. 88 | observation, reward, done, info = self.env.step(action) 89 | self.state = observation 90 | 91 | return(reward, self.state, done) 92 | 93 | 94 | 95 | 96 | 97 | 98 | def drawState(self): 99 | 100 | self.env.render() 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | # 109 | -------------------------------------------------------------------------------- /ablation_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./classes') 3 | from EPANN import EPANN 4 | from GymAgent import GymAgent 5 | import matplotlib.pyplot as plt 6 | import FileSystemTools as fst 7 | import numpy as np 8 | 9 | params = {} 10 | params['env_name'] = 'Pendulum' 11 | 12 | e = EPANN(agent_class=GymAgent, env_name=params['env_name']) 13 | 14 | path = '/home/declan/Documents/code/evo1/save_runs/evolve_23-01-2019_18-45-18__GymAgentPendulum_good' 15 | params['NN_file'] = fst.combineDirAndFile(path, 'bestNN_GymAgent_23-01-2019_18-45-18' + '.json') 16 | 17 | e.loadNetworkFromFile(params['NN_file']) 18 | 19 | datetime_str = fst.getDateString() 20 | dir = fst.combineDirAndFile('misc_runs', 'ablation_{}_{}'.format(datetime_str, params['env_name'])) 21 | fst.makeDir(dir) 22 | plot_dir = fst.makeDir(fst.combineDirAndFile(dir, 'plots')) 23 | 24 | log_output_str = '' 25 | 26 | params['N_runs_per_NN'] = 50 27 | params['N_episode_steps'] = e.agent.max_episode_steps 28 | 29 | params['N_weights_to_remove'] = len(e.weights_list) 30 | 31 | ablation_FF_mean_std = [] 32 | 33 | for w_removed in range(params['N_weights_to_remove']): 34 | 35 | # Run the ablation for several times to get stats 36 | ablation_scores = [] 37 | for run in range(params['N_runs_per_NN']): 38 | ablation_scores.append(e.runEpisode(params['N_episode_steps'])) 39 | 40 | # Add the mean and std 41 | ablation_FF_mean_std.append([w_removed, np.mean(ablation_scores), np.std(ablation_scores)]) 42 | 43 | # Save what the NN currently looks like 44 | NN_save_fname = fst.combineDirAndFile(plot_dir, 'NN_plot_{}w_removed.png'.format(w_removed)) 45 | e.plotNetwork(show_plot=False, save_plot=True, fname=NN_save_fname, node_legend=True) 46 | 47 | # Remove the next smallest weight 48 | smallest_weight_connection = min(e.weights_dict, key=lambda x: abs(e.weights_dict.get(x))) 49 | remove_str = 'Removing weight {} that has value {:.3f}\n'.format(smallest_weight_connection, e.weights_dict[smallest_weight_connection]) 50 | print(remove_str) 51 | log_output_str += remove_str 52 | e.removeConnectingWeight(smallest_weight_connection) 53 | 54 | 55 | 56 | # Save params 57 | fst.writeDictToFile(params, fst.combineDirAndFile(dir, 'Params_logfile_{}.log'.format(datetime_str))) 58 | 59 | # Save weight order removal 60 | removal_log_fname = fst.combineDirAndFile(dir, 'Weight_remove_order_{}.txt'.format(datetime_str)) 61 | with open(removal_log_fname, 'w+') as f: 62 | f.write(log_output_str) 63 | 64 | # Plot the mean and std FF as a function of removing weights 65 | ablation_FF_mean_std = np.array(ablation_FF_mean_std) 66 | weights_removed = ablation_FF_mean_std[:, 0] 67 | FF_mean = ablation_FF_mean_std[:, 1] 68 | FF_std = ablation_FF_mean_std[:, 2] 69 | plt.fill_between( 70 | np.array(range(len(FF_mean))), 71 | FF_mean - FF_std, 72 | FF_mean + FF_std, 73 | facecolor='dodgerblue', alpha=0.5) 74 | 75 | plt.plot(FF_mean, color='mediumblue') 76 | plt.xlabel('# weights removed') 77 | plt.ylabel('FF') 78 | plt.title('Ablation test, FF over {} episodes each'.format(params['N_runs_per_NN'])) 79 | fname = fst.combineDirAndFile(dir, '{}_{}.png'.format('ablation_FF_mean-std_plot', datetime_str)) 80 | plt.savefig(fname) 81 | 82 | # Save mean/std 83 | fname = fst.combineDirAndFile(dir, '{}_{}.txt'.format('ablation_FF_mean-std', datetime_str)) 84 | np.savetxt(fname, ablation_FF_mean_std, fmt='%.4f') 85 | 86 | 87 | 88 | 89 | # 90 | -------------------------------------------------------------------------------- /movie_combine.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./classes') 3 | from moviepy.editor import VideoFileClip, clips_array, vfx 4 | import argparse 5 | import FileSystemTools as fst 6 | import glob 7 | import subprocess 8 | import os 9 | 10 | 11 | def combineMovieFiles(**kwargs): 12 | 13 | path = kwargs.get('path', None) 14 | file_type = kwargs.get('file_type', 'mp4') 15 | grid_size = kwargs.get('grid_size', '1x1') 16 | make_gif = kwargs.get('make_gif', True) 17 | 18 | # get the files with the video clip extension type 19 | file_list = glob.glob(fst.addTrailingSlashIfNeeded(path) + '*' + file_type) 20 | print('{} files of type {} found'.format(len(file_list), file_type)) 21 | 22 | # make sure you've passed a grid size argument 23 | assert grid_size != '0', 'need to provide a grid_size arg of form x' 24 | 25 | try: 26 | grid_dims = [int(y) for y in grid_size.split('x')] 27 | grid_height, grid_width = grid_dims[0], grid_dims[1] 28 | N_movie_panels = grid_height*grid_width 29 | print('need {} movie files for a grid of size {}'.format(N_movie_panels, grid_size)) 30 | except: 31 | print('something wrong with grid_size argument, should be of form 5x8 (or similar)') 32 | exit() 33 | 34 | # take only the first N video files, no choosing process. It will use ones created 35 | # from running this program previously if they're there, so be careful. 36 | files_used = file_list[:N_movie_panels] 37 | 38 | clip_list = [] 39 | clip_matrix = [] 40 | 41 | # create a list of the video file clip objects, with a small margin around each 42 | for f in files_used: 43 | clip1 = VideoFileClip(f).margin(10) 44 | #clip1 = clip1.resize(0.50) 45 | clip_list.append(clip1) 46 | 47 | # put them into a list of lists, ie, a matrix, in the shape you want them to finally be 48 | for y in range(grid_height): 49 | temp_list = [] 50 | for x in range(grid_width): 51 | temp_list.append(clip_list[y*grid_width + x]) 52 | 53 | clip_matrix.append(temp_list) 54 | 55 | print('size of clip_matrix:', len(clip_matrix), len(clip_matrix[0])) 56 | final_clip = clips_array(clip_matrix) # put the clips side by side 57 | 58 | # fname stuff 59 | dt_string = fst.getDateString() 60 | base_fname = 'COMBINED_{}_{}'.format(grid_size, dt_string) 61 | movie_output_fname = fst.combineDirAndFile(path, '{}.{}'.format(base_fname, file_type)) 62 | 63 | final_clip.write_videofile(movie_output_fname) # create the combined video file! 64 | 65 | if make_gif: 66 | 67 | px_size = 1260 68 | fps = 30 69 | 70 | gif_output_fname = fst.combineDirAndFile(path, '{}.gif'.format(base_fname)) 71 | 72 | palette_fname = 'palette.png' 73 | 74 | create_palette_cmd = 'ffmpeg -y -i {} -vf fps={},scale={}:-1:flags=lanczos,palettegen {}'.format(movie_output_fname, fps, px_size, palette_fname) 75 | create_gif_cmd = 'ffmpeg -i {} -i {} -filter_complex "fps={},scale={}:-1:flags=lanczos[x];[x][1:v]paletteuse" {}'.format(movie_output_fname, palette_fname, fps, px_size, gif_output_fname) 76 | 77 | os.system(create_palette_cmd) 78 | os.system(create_gif_cmd) 79 | 80 | remove_palette_cmd = f'rm {palette_fname}' 81 | remove_movie_cmd = f'rm {movie_output_fname}' 82 | 83 | os.system(remove_palette_cmd) 84 | os.system(remove_movie_cmd) 85 | 86 | 87 | if __name__ == '__main__': 88 | 89 | # arguments to be read in via CLI 90 | parser = argparse.ArgumentParser() 91 | parser.add_argument('path') 92 | parser.add_argument('--grid_size', default='0') 93 | parser.add_argument('--file_type', default='mp4') 94 | parser.add_argument('--gif', action='store_true', default=False) 95 | args = parser.parse_args() 96 | 97 | kwargs = {} 98 | kwargs['path'] = args.path 99 | kwargs['file_type'] = args.file_type 100 | kwargs['grid_size'] = args.grid_size 101 | kwargs['make_gif'] = args.gif 102 | 103 | combineMovieFiles(**kwargs) 104 | 105 | 106 | 107 | 108 | 109 | # 110 | -------------------------------------------------------------------------------- /classes/GymAgent.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./classes') 3 | import numpy as np 4 | import gym 5 | from gym import wrappers 6 | import FileSystemTools as fst 7 | import json 8 | 9 | 10 | ''' 11 | 12 | This is a generalized agent for OpenAI gym environments. 13 | I'm doing it because I had to create diff. agents for each environment, 14 | when they really just need a few specific things for each. So now you just pass it 15 | env_name in the kwargs, and it will look up in a json file the right stuff. 16 | 17 | Here, the env_name you pass it will be something like 'Pendulum', not Pendulum-v0, 18 | because I don't want to have to deal with remembering versions. 19 | 20 | See createEnvJson.py and loadEnvJson() for details. 21 | 22 | need to provide: 23 | 24 | --state labels (for each state var) 25 | --action labels (for each action var) 26 | --gym_env_name 27 | --action_space_type 28 | 29 | functions: 30 | 31 | --getStateVec() 32 | --initEpisode() 33 | --iterate() (returns a tuple of (reward, state, boolean isDone)) 34 | --setMaxEpisodeSteps() 35 | 36 | ''' 37 | 38 | 39 | 40 | class GymAgent: 41 | 42 | 43 | def __init__(self, **kwargs): 44 | 45 | self.env_name = kwargs.get('env_name', None) 46 | assert self.env_name is not None, 'Need to provide an env_name argument!' 47 | 48 | # Load all the properties for this env. 49 | self.loadEnvJson(self.env_name) 50 | # Create the env 51 | self.env = gym.make(self.gym_env_name) 52 | self.setMaxEpisodeSteps(self.max_episode_steps) 53 | gym.logger.set_level(40) 54 | 55 | self.state = self.env.reset() # Should I be doing this here? sometimes trouble with resetting when done=False 56 | dt = fst.getDateString() 57 | self.base_name = f'{self.env_name}_{dt}' 58 | self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/') 59 | self.monitor_is_on = False 60 | 61 | 62 | def setMaxEpisodeSteps(self, N_steps): 63 | 64 | self.env._max_episode_steps = N_steps 65 | self.env.spec.max_episode_steps = N_steps 66 | self.env.spec.timestep_limit = N_steps 67 | 68 | 69 | def closeEnv(self): 70 | # This doesn't seem to be a good idea to use with monitor? 71 | self.env.close() 72 | #self.env.render(close=True) 73 | 74 | 75 | def setMonitorOn(self, show_run=True): 76 | # It seems like when I call this, it gives a warning about the env not being 77 | # made with gym.make (which it is...), but if I call it only once for the same 78 | # agent, it doesn't run it every time I call it? 79 | #if not self.monitor_is_on: 80 | # 81 | # Also, it seems like you can't record the episode without showing it on the screen. 82 | # See https://github.com/openai/gym/issues/347 maybe? 83 | 84 | self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name) 85 | #if show_run: 86 | if True: 87 | self.env = wrappers.Monitor(self.env, self.record_dir) 88 | else: 89 | self.env = wrappers.Monitor(self.env, self.record_dir, video_callable=False, force=True) 90 | self.monitor_is_on = True 91 | 92 | 93 | def getStateVec(self): 94 | return(self.state[:self.N_state_terms]) 95 | 96 | 97 | def initEpisode(self): 98 | self.state = self.env.reset() 99 | 100 | 101 | def iterate(self, action): 102 | # Action 0 is go L, action 1 is go R. 103 | observation, reward, done, info = self.env.step(action) 104 | self.state = observation 105 | 106 | return(reward, self.state, done) 107 | 108 | 109 | 110 | def drawState(self): 111 | self.env.render() 112 | 113 | 114 | def loadEnvJson(self, env_name): 115 | 116 | with open('gym_env_info.json') as json_file: 117 | env_info_dict = json.load(json_file) 118 | 119 | env_info = env_info_dict[env_name] 120 | 121 | self.gym_env_name = env_info['gym_env_name'] 122 | self.state_labels = env_info['state_labels'] 123 | self.action_labels = env_info['action_labels'] 124 | self.action_space_type = env_info['action_space_type'] 125 | self.max_episode_steps = env_info['max_episode_steps'] 126 | self.N_state_terms = len(self.state_labels) 127 | self.N_actions = len(self.action_labels) 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | # 136 | -------------------------------------------------------------------------------- /classes/Node.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./classes') 3 | import numpy as np 4 | from math import exp, tanh 5 | from copy import copy 6 | 7 | 8 | class Node: 9 | 10 | def __init__(self, node_index): 11 | 12 | self.is_input_node = False 13 | self.is_output_node = False 14 | self.is_bias_node = False 15 | self.is_memory_node = False 16 | 17 | self.node_index = node_index 18 | 19 | self.input_indices = [] 20 | 21 | self.inputs_received = [] 22 | 23 | self.output_weights = {} 24 | 25 | self.value = None 26 | 27 | 28 | def setToInputNode(self): 29 | self.is_input_node = True 30 | 31 | 32 | def setToOutputNode(self): 33 | self.is_output_node = True 34 | 35 | 36 | def setToBiasNode(self): 37 | self.is_bias_node = True 38 | self.value = 1.0 39 | 40 | 41 | def setToMemoryNode(self): 42 | self.is_memory_node = True 43 | self.value = 0.0 44 | 45 | 46 | def getValue(self): 47 | 48 | if self.value is not None: 49 | return(self.value) 50 | else: 51 | if self.is_output_node: 52 | tot = sum(self.inputs_received) 53 | self.value = tot 54 | return(self.value) 55 | elif self.is_bias_node: 56 | pass 57 | elif self.is_memory_node: 58 | pass 59 | elif self.is_input_node: 60 | # For now, I'm just gonna set the input nodes directly via the .output value. 61 | return(self.value) 62 | else: 63 | tot = sum(self.inputs_received) 64 | self.value = self.nonlinear(tot) 65 | return(self.value) 66 | 67 | 68 | 69 | def calculateNodeValue(self): 70 | if self.is_output_node: 71 | tot = sum(self.inputs_received.values()) 72 | self.value = tot 73 | elif self.is_bias_node: 74 | pass 75 | elif self.is_memory_node: 76 | pass 77 | elif self.is_input_node: 78 | # For now, I'm just gonna set the input nodes directly via the .output value. 79 | pass 80 | else: 81 | tot = sum(self.inputs_received.values()) 82 | self.value = self.nonlinear(tot) 83 | 84 | 85 | def clearInputs(self): 86 | if not self.is_input_node: 87 | self.inputs_received = [] 88 | 89 | 90 | def clearNode(self): 91 | self.clearInputs() 92 | self.value = None 93 | 94 | 95 | def setRandomOutputWeights(self): 96 | weights = np.random.normal(size=self.getNOutputs(), scale=0.1) 97 | self.output_weights = dict(zip(self.getOutputIndices(), weights)) 98 | 99 | 100 | def removeFromInputIndices(self, ind): 101 | self.input_indices.remove(ind) 102 | 103 | def removeFromOutputWeights(self, ind): 104 | del self.output_weights[ind] 105 | 106 | def addToInputIndices(self, ind): 107 | self.input_indices.append(ind) 108 | 109 | 110 | def changeOutputWeightInd(self, old_ind, new_ind): 111 | weight = self.output_weights.pop(old_ind) 112 | self.output_weights[new_ind] = weight 113 | 114 | def addToOutputWeights(self, new_output_ind, val=None, std=0.1): 115 | if val is not None: 116 | self.output_weights[new_output_ind] = val 117 | else: 118 | self.output_weights[new_output_ind] = np.random.normal(scale=std) 119 | 120 | 121 | def mutateOutputWeight(self, ind, std=0.1): 122 | self.output_weights[ind] += np.random.normal(scale=std) 123 | 124 | 125 | def getOutputIndices(self): 126 | return(list(self.output_weights.keys())) 127 | 128 | 129 | def getNInputs(self): 130 | return(len(self.input_indices)) 131 | 132 | def getNOutputs(self): 133 | return(len(self.output_weights)) 134 | 135 | 136 | def getOutputWeightStr(self): 137 | w_str = ', '.join(['{}: {:.3f}'.format(k,v) for k,v in self.output_weights.items()]) 138 | s = '[{}]'.format(w_str) 139 | return(s) 140 | 141 | def setOutputIndices(self, ind_list): 142 | self.output_weights = dict(zip(copy(ind_list), [0]*len(ind_list))) 143 | 144 | 145 | def setInputIndices(self, ind_list): 146 | self.input_indices = copy(ind_list) 147 | self.clearInputs() 148 | 149 | 150 | def allInputsReceived(self): 151 | 152 | #if self.input_indices is None: 153 | if len(self.input_indices) == 0: 154 | return(True) 155 | 156 | # checks if there are any None's left in the list. If there aren't, it has all inputs 157 | # and is ready to proceed. 158 | if list(self.inputs_received.values()).count(None)==0: 159 | return(True) 160 | else: 161 | return(False) 162 | 163 | 164 | 165 | 166 | def addToInputsReceived(self, val): 167 | self.inputs_received.append(val) 168 | 169 | 170 | def nonlinear(self, x): 171 | 172 | # Let's start with a nice simple sigmoid. 173 | 174 | #sigmoid = 1/(1 + exp(-x)) 175 | #relu = max(0, x) 176 | tanh_x = tanh(x) 177 | 178 | return(tanh_x) 179 | 180 | 181 | # 182 | -------------------------------------------------------------------------------- /scrap_old_test/PuckworldAgent.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from math import sqrt 4 | 5 | 6 | 7 | class PuckworldAgent: 8 | 9 | 10 | def __init__(self, **kwargs): 11 | 12 | self.xlims = kwargs.get('xlims', np.array([-0.5,0.5])) 13 | self.ylims = kwargs.get('ylims', np.array([-0.5,0.5])) 14 | self.lims = np.array((self.xlims,self.ylims)) 15 | self.max_dist = sqrt(np.ptp(self.xlims)**2 + np.ptp(self.ylims)**2) 16 | self.a = kwargs.get('a',1.0) 17 | self.drag = kwargs.get('drag', 0.5) 18 | self.time_step = kwargs.get('dt',10**-1) 19 | self.reward_type = kwargs.get('reward','sparse') 20 | 21 | self.passed_params = {} 22 | check_params = ['a', 'drag', 'dt', 'reward'] 23 | for param in check_params: 24 | if kwargs.get(param, None) is not None: 25 | self.passed_params[param] = kwargs.get(param, None) 26 | 27 | self.N_actions = 4 28 | 29 | 30 | self.circ_rad = np.ptp(self.xlims)/20.0 31 | self.target_rad = 1*self.circ_rad 32 | self.resetTarget() 33 | 34 | self.pos0 = np.array([self.xlims.mean()/2.0,self.ylims.mean()/2.0]) 35 | self.v0 = np.array([0.0,0.0]) 36 | self.resetStateValues() 37 | self.accel_array = np.array([[0,1],[0,-1],[-1,0],[1,0]]) 38 | 39 | 40 | 41 | self.N_state_terms = len(self.getStateVec()) 42 | 43 | 44 | 45 | def puckTargetDist(self): 46 | return(sqrt(np.sum((self.pos-self.target)**2))) 47 | 48 | 49 | def addToHist(self): 50 | self.pos_hist = np.concatenate((self.pos_hist,[self.pos])) 51 | self.v_hist = np.concatenate((self.v_hist,[self.v])) 52 | self.t.append(self.t[-1] + self.time_step) 53 | self.r_hist.append(self.reward()) 54 | 55 | 56 | def resetTarget(self): 57 | 58 | self.target = self.target_rad + self.lims[:,0] + np.random.random((2,))*(np.ptp(self.lims,axis=1)-2*self.target_rad) 59 | 60 | 61 | def iterateEuler(self,action): 62 | 63 | #this uses the Euler-Cromer method to move. 64 | 65 | #Right now I'm just gonna make it sit against a wall if it goes to the 66 | #boundary, but it might be cool to make periodic bry conds, to see if it would 67 | #learn to zoom around it. 68 | 69 | a = self.actionToAccel(action) - self.drag*self.v 70 | 71 | v_next = self.v + a*self.time_step 72 | pos_next = self.pos + v_next*self.time_step 73 | 74 | #To handle the walls 75 | for i in [0,1]: 76 | if pos_next[i] < (self.lims[i,0] + self.circ_rad): 77 | pos_next[i] = self.lims[i,0] + self.circ_rad 78 | # This makes it "bounce" off the wall, so it keeps momentum. 79 | #v_next[i] = -v_next[i] 80 | # This makes it "stick" to the wall. 81 | v_next[i] = 0 82 | 83 | if pos_next[i] > (self.lims[i,1] - self.circ_rad): 84 | pos_next[i] = self.lims[i,1] - self.circ_rad 85 | #v_next[i] = -v_next[i] 86 | v_next[i] = 0 87 | 88 | self.pos = pos_next 89 | self.v = v_next 90 | self.addToHist() 91 | 92 | 93 | def actionToAccel(self,action): 94 | self.a_hist.append(action) 95 | return(self.a*self.accel_array[action]) 96 | 97 | 98 | 99 | ###################### Required agent functions 100 | 101 | 102 | def getPassedParams(self): 103 | #This returns a dict of params that were passed to the agent, that apply to the agent. 104 | #So if you pass it a param for 'reward', it will return that, but it won't return the 105 | #default val if you didn't pass it. 106 | return(self.passed_params) 107 | 108 | 109 | def getStateVec(self): 110 | assert self.target is not None, 'Need target to get state vec' 111 | return(np.concatenate((self.pos,self.v,self.target))) 112 | 113 | 114 | def reward(self): 115 | 116 | assert self.target is not None, 'Need a target' 117 | 118 | max_R = 1 119 | 120 | if self.reward_type == 'sparse': 121 | if self.puckTargetDist() <= (self.target_rad + self.circ_rad): 122 | return(max_R) 123 | else: 124 | return(-0.01) 125 | 126 | if self.reward_type == 'shaped': 127 | #return(max_R*(self.max_dist/2.0 - self.puckTargetDist())) 128 | #These numbers will probably have to change if a, dt, or the dimensions change. 129 | return(-0.5*self.puckTargetDist() + 0.4) 130 | 131 | 132 | def initEpisode(self): 133 | self.resetStateValues() 134 | self.resetTarget() 135 | 136 | 137 | def iterate(self,action): 138 | self.iterateEuler(action) 139 | 140 | r = self.reward() 141 | if r > 0: 142 | self.resetTarget() 143 | 144 | return(r, self.getStateVec(), False) 145 | 146 | 147 | def resetStateValues(self): 148 | 149 | self.pos = self.pos0 150 | self.v = self.v0 151 | 152 | self.pos_hist = np.array([self.pos]) 153 | self.v_hist = np.array([self.v]) 154 | self.t = [0] 155 | self.a_hist = [0] 156 | self.r_hist = [] 157 | 158 | 159 | def drawState(self,ax): 160 | 161 | ax.clear() 162 | ax.set_xlim(tuple(self.xlims)) 163 | ax.set_ylim(tuple(self.ylims)) 164 | 165 | ax.set_xlabel('x') 166 | ax.set_ylabel('y') 167 | ax.set_aspect('equal') 168 | 169 | puck = plt.Circle(tuple(self.pos), self.circ_rad, color='tomato') 170 | ax.add_artist(puck) 171 | 172 | if self.target is not None: 173 | target = plt.Circle(tuple(self.target), self.target_rad, color='seagreen') 174 | ax.add_artist(target) 175 | 176 | 177 | def plotStateParams(self,axes): 178 | 179 | ax1 = axes[0] 180 | ax2 = axes[1] 181 | ax3 = axes[2] 182 | ax4 = axes[3] 183 | 184 | ax1.clear() 185 | ax1.plot(self.pos_hist[:,0][-1000:],label='x') 186 | ax1.plot(self.pos_hist[:,1][-1000:],label='y') 187 | ax1.legend() 188 | 189 | ax2.clear() 190 | ax2.plot(self.a_hist[-1000:],label='a') 191 | ax2.set_yticks([0,1,2,3]) 192 | ax2.set_yticklabels(['U','D','L','R']) 193 | ax2.legend() 194 | 195 | 196 | ax3.clear() 197 | ax3.plot(self.r_hist[-1000:],label='R') 198 | ax3.legend() 199 | 200 | 201 | ax4.clear() 202 | ax4.plot(self.v_hist[:,0][-1000:],label='vx') 203 | ax4.plot(self.v_hist[:,1][-1000:],label='vy') 204 | ax4.legend() 205 | 206 | 207 | 208 | 209 | # 210 | -------------------------------------------------------------------------------- /classes/FileSystemTools.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from os import mkdir 3 | import os 4 | from copy import copy,deepcopy 5 | import time 6 | import glob 7 | import subprocess 8 | 9 | def getDateString(): 10 | return(datetime.now().strftime('%d-%m-%Y_%H-%M-%S')) 11 | 12 | 13 | def makeDir(dir_name): 14 | # Even if this is in a library dir, it should make the dir 15 | # in the script that called it. 16 | mkdir(dir_name) 17 | return(dir_name) 18 | 19 | 20 | def makeDateDir(base_dir='.'): 21 | # Just creates a dir with the current date for its name 22 | ds = getDateString() 23 | full_dir = combineDirAndFile(base_dir, ds) 24 | makeDir(full_dir) 25 | return(full_dir) 26 | 27 | 28 | def makeLabelDateDir(label, base_dir='.'): 29 | # You give it a label, and it creates the dir label_datestring 30 | dir_name = label + '_' + getDateString() 31 | full_dir = combineDirAndFile(base_dir, dir_name) 32 | makeDir(full_dir) 33 | return(full_dir) 34 | 35 | 36 | def combineDirAndFile(dir, file): 37 | # Adds the file to the end of dir, adding a slash in between if needed. 38 | return(addTrailingSlashIfNeeded(dir) + file) 39 | 40 | 41 | def dictPrettyPrint(in_dict): 42 | 43 | # Formats a dict into a nice string with each k,v entry on a new line, 44 | # and prints it. 45 | dict_str = '{\n' 46 | 47 | for k,v in in_dict.items(): 48 | dict_str += '\t{} : {}\n'.format(k, v) 49 | 50 | dict_str += '\n}\n' 51 | print(dict_str) 52 | 53 | 54 | 55 | 56 | def dictToStringList(dict): 57 | pd_copy = copy(dict) 58 | for k,v in pd_copy.items(): 59 | if type(v).__name__ == 'float': 60 | if abs(v)>10**-4: 61 | pd_copy[k] = '{:.5f}'.format(v) 62 | else: 63 | pd_copy[k] = '{:.2E}'.format(v) 64 | 65 | params = [str(k)+'='+str(v) for k,v in pd_copy.items() if v is not None] 66 | return(params) 67 | 68 | 69 | 70 | def paramDictToFnameStr(param_dict): 71 | # Creates a string that can be used as an fname, separated by 72 | # underscores. If a param has the value None, it isn't included. 73 | params = dictToStringList(param_dict) 74 | return('_'.join(params)) 75 | 76 | def paramDictToLabelStr(param_dict): 77 | # Creates a string that can be used as an fname, separated by 78 | # ', '. If a param has the value None, it isn't included. 79 | params = dictToStringList(param_dict) 80 | return(', '.join(params)) 81 | 82 | 83 | def listToFname(list): 84 | return('_'.join(list)) 85 | 86 | 87 | def parseSingleAndListParams(param_dict, exclude_list): 88 | 89 | # This is useful for if you want to do multiple runs, varying one or 90 | # several parameters at once. exclude_list are ones you don't want to 91 | # include in the parameters in the tuple. 92 | 93 | # It returns a list of the parameters that are varied, 94 | # and a list of dictionaries that can be directly passed to a function, where 95 | # each one has a different set of the varied params. 96 | # 97 | # You should pass the args where if you don't want to vary an arg, it's just normal 98 | # my_arg = 5, but if you do want to vary it, you pass it a list of the vary values, like 99 | # my_arg = [1, 5, 8]. If you want to vary two at the same time, you pass them both as separate 100 | # lists, and it will match them up, but they need to be the same size. 101 | 102 | # list_params is just a list of the params that were passed as a list, that we'll vary. 103 | list_params = [] 104 | # single_params is a dict of the params that aren't varied and will have the same vals in each 105 | # separate run. 106 | single_params = {} 107 | # ziplist is a list of the lists for the params that are varied. So if there are two varied 108 | # args, each length 3, it will take these, and then below zip them to create a list of pairs. 109 | # arg1=[1,2,3], arg2=[2,4,8] -> ziplist=[arg1,arg2] -> param_tups=[(1,2),(2,4),(3,8)] 110 | ziplist = [] 111 | 112 | 113 | for k,v in param_dict.items(): 114 | if type(v).__name__ == 'list': 115 | list_params.append(k) 116 | ziplist.append(v) 117 | else: 118 | if k not in exclude_list: 119 | single_params[k] = v 120 | 121 | param_tups = list(zip(*ziplist)) 122 | 123 | vary_param_dicts = [] 124 | vary_param_tups = [] 125 | for tup in param_tups: 126 | temp_dict = dict(zip(list_params,tup)) 127 | temp_kw = {**single_params, **temp_dict} 128 | vary_param_tups.append(temp_dict) 129 | vary_param_dicts.append(temp_kw) 130 | 131 | # list_params: just a list of the names of the varied ones. 132 | # vary_param_dicts: a list of the dicts that you can pass to each iteration, which includes the args that don't vary. 133 | # vary_param_tups: a list of dicts corresponding to vary_param_dicts, of only the values that change. 134 | return(list_params, vary_param_dicts, vary_param_tups) 135 | 136 | 137 | 138 | def strfdelta(tdelta, fmt): 139 | d = {"days": tdelta.days} 140 | d["hours"], rem = divmod(tdelta.seconds, 3600) 141 | d["minutes"], d["seconds"] = divmod(rem, 60) 142 | return fmt.format(**d) 143 | 144 | 145 | def getCurTimeObj(): 146 | return(datetime.now()) 147 | 148 | 149 | def getTimeDiffNum(start_time_obj): 150 | 151 | diff = datetime.timestamp(datetime.now()) - datetime.timestamp(start_time_obj) 152 | return(diff) 153 | 154 | 155 | def getTimeDiffObj(start_time_obj): 156 | #Gets the time diff in a nice format from the start_time_obj. 157 | diff = datetime.now() - start_time_obj 158 | return(diff) 159 | 160 | 161 | def getTimeDiffStr(start_time_obj): 162 | #Gets the time diff in a nice format from the start_time_obj. 163 | diff = getTimeDiffObj(start_time_obj) 164 | 165 | return(strfdelta(diff,'{hours} hrs, {minutes} mins, {seconds} s')) 166 | 167 | 168 | def writeDictToFile(dict, fname): 169 | # You have to copy it here, otherwise it'll actually overwrite the values in the dict 170 | # you passed. 171 | my_dict = copy(dict) 172 | f = open(fname,'w+') 173 | for k,v in my_dict.items(): 174 | if type(v).__name__ == 'float': 175 | if abs(v)>10**-4: 176 | my_dict[k] = '{:.5f}'.format(v) 177 | else: 178 | my_dict[k] = '{:.2E}'.format(v) 179 | f.write('{} = {}\n'.format(k, my_dict[k])) 180 | 181 | f.close() 182 | 183 | 184 | def readFileToDict(fname): 185 | d = {} 186 | with open(fname) as f: 187 | for line in f: 188 | (key, val) = line.split(' = ') 189 | val = val.strip('\n') 190 | #This is to handle the fact that everything gets read in 191 | #as a string, but some stuff you probably want to be floats. 192 | try: 193 | val = float(val) 194 | except: 195 | val = str(val) 196 | 197 | d[key] = val 198 | 199 | 200 | return(d) 201 | 202 | 203 | def dirFromFullPath(fname): 204 | # This gives you the path, stripping the local filename, if you pass it 205 | # a long path + filename. 206 | parts = fname.split('/') 207 | last_part = parts[-1] 208 | path = fname.replace(last_part,'') 209 | if path == '': 210 | return('./') 211 | else: 212 | return(path) 213 | 214 | 215 | def fnameFromFullPath(fname): 216 | # This just gets the local filename if you passed it some huge long name with the path. 217 | parts = fname.split('/') 218 | last_part = parts[-1] 219 | return(last_part) 220 | 221 | def stripAnyTrailingSlash(path): 222 | if path[-1] == '/': 223 | return(path[:-1]) 224 | else: 225 | return(path) 226 | 227 | 228 | def addTrailingSlashIfNeeded(path): 229 | if path[-1] == '/': 230 | return(path) 231 | else: 232 | return(path + '/') 233 | 234 | 235 | 236 | 237 | 238 | def gifFromImages(imgs_path, gif_name, ext = '.png', delay=50): 239 | 240 | 241 | imgs_path = stripAnyTrailingSlash(imgs_path) 242 | file_list = glob.glob(imgs_path + '/' + '*' + ext) # Get all the pngs in the current directory 243 | #print(file_list) 244 | #print([fnameFromFullPath(x).split('.png')[0] for x in file_list]) 245 | #list.sort(file_list, key=lambda x: int(x.split('_')[1].split('.png')[0])) 246 | list.sort(file_list, key=lambda x: int(fnameFromFullPath(x).split(ext)[0])) 247 | #list.sort(file_list) # Sort the images by #, this may need to be tweaked for your use case 248 | #print(file_list) 249 | assert len(file_list) < 300, 'Too many files ({}), will probably crash convert command.'.format(len(file_list)) 250 | 251 | output_fname = '{}/{}.gif'.format(imgs_path, gif_name) 252 | 253 | check_call_arglist = ['convert'] + ['-delay', str(delay)] + file_list + [output_fname] 254 | #print(check_call_arglist) 255 | print('Calling convert command to create gif...') 256 | subprocess.check_call(check_call_arglist) 257 | print('done.') 258 | return(output_fname) 259 | # older method: 260 | 261 | '''with open('image_list.txt', 'w') as file: 262 | for item in file_list: 263 | file.write("%s\n" % item) 264 | 265 | os.system('convert @image_list.txt {}/{}.gif'.format(imgs_path,gif_name)) # On windows convert is 'magick' 266 | ''' 267 | 268 | # 269 | -------------------------------------------------------------------------------- /scrap_old_test/agent1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from math import sqrt 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torch.distributions import Categorical 9 | from copy import deepcopy 10 | 11 | 12 | class DQN(nn.Module): 13 | 14 | def __init__(self, D_in, H, D_out, NL_fn=torch.tanh, softmax=False): 15 | super(DQN, self).__init__() 16 | 17 | self.lin1 = nn.Linear(D_in,H) 18 | self.lin2 = nn.Linear(H,D_out) 19 | self.NL_fn = NL_fn 20 | self.softmax = softmax 21 | 22 | def forward(self, x): 23 | x = self.lin1(x) 24 | #x = F.relu(x) 25 | #x = torch.tanh(x) 26 | x = self.NL_fn(x) 27 | x = self.lin2(x) 28 | if self.softmax: 29 | x = torch.softmax(x,dim=1) 30 | return(x) 31 | 32 | 33 | 34 | class agent1: 35 | 36 | 37 | def __init__(self, **kwargs): 38 | 39 | self.xlims = kwargs.get('xlims', np.array([-0.5,0.5])) 40 | self.ylims = kwargs.get('ylims', np.array([-0.5,0.5])) 41 | self.lims = np.array((self.xlims,self.ylims)) 42 | self.max_dist = sqrt(np.ptp(self.xlims)**2 + np.ptp(self.ylims)**2) 43 | self.a = kwargs.get('a',1.0) 44 | self.drag = kwargs.get('drag', 0.5) 45 | self.time_step = kwargs.get('dt',10**-1) 46 | self.reward_type = kwargs.get('reward','sparse') 47 | 48 | self.passed_params = {} 49 | check_params = ['a', 'drag', 'dt', 'reward'] 50 | for param in check_params: 51 | if kwargs.get(param, None) is not None: 52 | self.passed_params[param] = kwargs.get(param, None) 53 | 54 | self.N_actions = 4 55 | 56 | 57 | self.circ_rad = np.ptp(self.xlims)/20.0 58 | self.target_rad = 1*self.circ_rad 59 | self.resetTarget() 60 | 61 | self.pos0 = np.array([self.xlims.mean()/2.0,self.ylims.mean()/2.0]) 62 | self.v0 = np.array([0.0,0.0]) 63 | self.resetStateValues() 64 | self.accel_array = np.array([[0,1],[0,-1],[-1,0],[1,0]]) 65 | 66 | 67 | 68 | self.N_state_terms = len(self.getStateVec()) 69 | 70 | self.HLN = 20 71 | 72 | self.dtype = torch.float32 73 | torch.set_default_dtype(self.dtype) 74 | 75 | # I think it's already randomly initializing the weights with a gaussian mean=0, std=1 76 | self.policy_NN = DQN(self.N_state_terms, self.HLN, self.N_actions, softmax=True) 77 | 78 | '''for p in self.policy_NN.parameters(): 79 | print(p.data)''' 80 | self.N_weight_tensors = len(list(self.policy_NN.parameters())) 81 | 82 | 83 | self.N_mate_swaps = 18 84 | self.N_mutations = 2 85 | 86 | 87 | 88 | def puckTargetDist(self): 89 | return(sqrt(np.sum((self.pos-self.target)**2))) 90 | 91 | 92 | def addToHist(self): 93 | self.pos_hist = np.concatenate((self.pos_hist,[self.pos])) 94 | self.v_hist = np.concatenate((self.v_hist,[self.v])) 95 | self.t.append(self.t[-1] + self.time_step) 96 | self.r_hist.append(self.reward()) 97 | 98 | 99 | def resetTarget(self): 100 | 101 | self.target = self.target_rad + self.lims[:,0] + np.random.random((2,))*(np.ptp(self.lims,axis=1)-2*self.target_rad) 102 | 103 | 104 | def iterateEuler(self,action): 105 | 106 | #this uses the Euler-Cromer method to move. 107 | 108 | #Right now I'm just gonna make it sit against a wall if it goes to the 109 | #boundary, but it might be cool to make periodic bry conds, to see if it would 110 | #learn to zoom around it. 111 | 112 | a = self.actionToAccel(action) - self.drag*self.v 113 | 114 | v_next = self.v + a*self.time_step 115 | pos_next = self.pos + v_next*self.time_step 116 | 117 | #To handle the walls 118 | for i in [0,1]: 119 | if pos_next[i] < (self.lims[i,0] + self.circ_rad): 120 | pos_next[i] = self.lims[i,0] + self.circ_rad 121 | # This makes it "bounce" off the wall, so it keeps momentum. 122 | v_next[i] = -v_next[i] 123 | # This makes it "stick" to the wall. 124 | #v_next[i] = 0 125 | 126 | if pos_next[i] > (self.lims[i,1] - self.circ_rad): 127 | pos_next[i] = self.lims[i,1] - self.circ_rad 128 | v_next[i] = -v_next[i] 129 | #v_next[i] = 0 130 | 131 | self.pos = pos_next 132 | self.v = v_next 133 | self.addToHist() 134 | 135 | 136 | def actionToAccel(self,action): 137 | self.a_hist.append(action) 138 | return(self.a*self.accel_array[action]) 139 | 140 | 141 | def softmaxAction(self, state_vec): 142 | pi_vals = self.policy_NN(state_vec) 143 | m = Categorical(pi_vals) 144 | return(m.sample()) 145 | 146 | ###################### Required agent functions 147 | 148 | 149 | def mate(self, other_agent): 150 | 151 | ag1 = deepcopy(self) 152 | ag2 = deepcopy(other_agent) 153 | 154 | lin1_weight_shape = ag1.policy_NN.lin1.weight.data.shape 155 | lin1_bias_shape = ag1.policy_NN.lin1.bias.data.shape 156 | lin2_weight_shape = ag1.policy_NN.lin2.weight.data.shape 157 | lin2_bias_shape = ag1.policy_NN.lin2.bias.data.shape 158 | 159 | 160 | for i in range(self.N_mate_swaps): 161 | 162 | r1 = np.random.randint(0, lin1_weight_shape[0]) 163 | r2 = np.random.randint(0, lin1_weight_shape[1]) 164 | ag1.policy_NN.lin1.weight.data[r1,r2], ag2.policy_NN.lin1.weight.data[r1,r2] = ag2.policy_NN.lin1.weight.data[r1,r2], ag1.policy_NN.lin1.weight.data[r1,r2] 165 | 166 | r1 = np.random.randint(0, lin2_weight_shape[0]) 167 | r2 = np.random.randint(0, lin2_weight_shape[1]) 168 | ag1.policy_NN.lin2.weight.data[r1,r2], ag2.policy_NN.lin2.weight.data[r1,r2] = ag2.policy_NN.lin2.weight.data[r1,r2], ag1.policy_NN.lin2.weight.data[r1,r2] 169 | 170 | r1 = np.random.randint(0, lin1_weight_shape[0]) 171 | ag1.policy_NN.lin1.bias.data[r1], ag2.policy_NN.lin1.bias.data[r1] = ag2.policy_NN.lin1.bias.data[r1], ag1.policy_NN.lin1.bias.data[r1] 172 | 173 | r1 = np.random.randint(0, lin2_weight_shape[0]) 174 | ag1.policy_NN.lin2.bias.data[r1], ag2.policy_NN.lin2.bias.data[r1] = ag2.policy_NN.lin2.bias.data[r1], ag1.policy_NN.lin2.bias.data[r1] 175 | 176 | return(ag1, ag2) 177 | 178 | 179 | def isSameState(self, other_agent): 180 | return(False) 181 | 182 | 183 | def mutate(self): 184 | 185 | lin1_weight_shape = self.policy_NN.lin1.weight.data.shape 186 | lin1_bias_shape = self.policy_NN.lin1.bias.data.shape 187 | lin2_weight_shape = self.policy_NN.lin2.weight.data.shape 188 | lin2_bias_shape = self.policy_NN.lin2.bias.data.shape 189 | 190 | for i in range(self.N_mutations): 191 | 192 | r1 = np.random.randint(0, lin1_weight_shape[0]) 193 | r2 = np.random.randint(0, lin1_weight_shape[1]) 194 | self.policy_NN.lin1.weight.data[r1,r2] = np.random.randn() 195 | 196 | r1 = np.random.randint(0, lin2_weight_shape[0]) 197 | r2 = np.random.randint(0, lin2_weight_shape[1]) 198 | self.policy_NN.lin2.weight.data[r1,r2] = np.random.randn() 199 | 200 | r1 = np.random.randint(0, lin1_weight_shape[0]) 201 | self.policy_NN.lin1.bias.data[r1] = np.random.randn() 202 | 203 | r1 = np.random.randint(0, lin2_weight_shape[0]) 204 | self.policy_NN.lin2.bias.data[r1] = np.random.randn() 205 | 206 | 207 | 208 | def fitnessFunction(self): 209 | self.fixedLengthEpisode(100) 210 | # I think the fitness function is meant to be minimized, so we should pass it 211 | # the negative of the total reward. 212 | return(-sum(self.r_hist)) 213 | 214 | 215 | 216 | def fixedLengthEpisode(self, N_steps): 217 | self.resetTarget() 218 | self.resetStateValues() 219 | 220 | for i in range(N_steps): 221 | s = torch.tensor(self.getStateVec(), dtype=torch.float32).unsqueeze(dim=0) 222 | a = self.softmaxAction(s) 223 | r, s_next = self.iterate(a) 224 | 225 | 226 | def getPassedParams(self): 227 | #This returns a dict of params that were passed to the agent, that apply to the agent. 228 | #So if you pass it a param for 'reward', it will return that, but it won't return the 229 | #default val if you didn't pass it. 230 | return(self.passed_params) 231 | 232 | 233 | def getStateVec(self): 234 | assert self.target is not None, 'Need target to get state vec' 235 | return(np.concatenate((self.pos,self.v,self.target))) 236 | 237 | 238 | def getState(self): 239 | return(self.getStateVec()) 240 | 241 | def printState(self): 242 | print(self.getState()) 243 | 244 | def reward(self): 245 | 246 | assert self.target is not None, 'Need a target' 247 | 248 | max_R = 1 249 | 250 | if self.reward_type == 'sparse': 251 | if self.puckTargetDist() <= (self.target_rad + self.circ_rad): 252 | return(max_R) 253 | else: 254 | return(-0.01) 255 | 256 | if self.reward_type == 'shaped': 257 | #return(max_R*(self.max_dist/2.0 - self.puckTargetDist())) 258 | #These numbers will probably have to change if a, dt, or the dimensions change. 259 | return(-0.5*self.puckTargetDist() + 0.4) 260 | 261 | 262 | def initEpisode(self): 263 | self.resetStateValues() 264 | self.resetTarget() 265 | 266 | 267 | def iterate(self,action): 268 | self.iterateEuler(action) 269 | 270 | r = self.reward() 271 | if r > 0: 272 | self.resetTarget() 273 | 274 | return(r,self.getStateVec()) 275 | 276 | 277 | def resetStateValues(self): 278 | 279 | self.pos = self.pos0 280 | self.v = self.v0 281 | 282 | self.pos_hist = np.array([self.pos]) 283 | self.v_hist = np.array([self.v]) 284 | self.t = [0] 285 | self.a_hist = [0] 286 | self.r_hist = [] 287 | 288 | 289 | def drawState(self, ax): 290 | 291 | ax.clear() 292 | ax.set_xlim(tuple(self.xlims)) 293 | ax.set_ylim(tuple(self.ylims)) 294 | 295 | ax.set_xlabel('x') 296 | ax.set_ylabel('y') 297 | ax.set_aspect('equal') 298 | 299 | puck = plt.Circle(tuple(self.pos), self.circ_rad, color='tomato') 300 | ax.add_artist(puck) 301 | 302 | if self.target is not None: 303 | target = plt.Circle(tuple(self.target), self.target_rad, color='seagreen') 304 | ax.add_artist(target) 305 | 306 | 307 | def plotStateParams(self,axes): 308 | 309 | ax1 = axes[0] 310 | ax2 = axes[1] 311 | ax3 = axes[2] 312 | ax4 = axes[3] 313 | 314 | ax1.clear() 315 | ax1.plot(self.pos_hist[:,0][-1000:],label='x') 316 | ax1.plot(self.pos_hist[:,1][-1000:],label='y') 317 | ax1.legend() 318 | 319 | ax2.clear() 320 | ax2.plot(self.a_hist[-1000:],label='a') 321 | ax2.set_yticks([0,1,2,3]) 322 | ax2.set_yticklabels(['U','D','L','R']) 323 | ax2.legend() 324 | 325 | 326 | ax3.clear() 327 | ax3.plot(self.r_hist[-1000:],label='R') 328 | ax3.legend() 329 | 330 | 331 | ax4.clear() 332 | ax4.plot(self.v_hist[:,0][-1000:],label='vx') 333 | ax4.plot(self.v_hist[:,1][-1000:],label='vy') 334 | ax4.legend() 335 | 336 | 337 | 338 | 339 | # 340 | -------------------------------------------------------------------------------- /classes/PopTests.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./classes') 3 | import Population 4 | import matplotlib.pyplot as plt 5 | from statistics import mean,stdev 6 | import FileSystemTools as fst 7 | from time import time 8 | import numpy as np 9 | import os 10 | import glob 11 | from math import sqrt, ceil 12 | import subprocess 13 | 14 | 15 | 16 | def varyParam(**kwargs): 17 | 18 | st = fst.getCurTimeObj() 19 | 20 | date_time = fst.getDateString() 21 | notes = kwargs.get('notes', '') 22 | N_runs = kwargs.get('N_runs', 1) 23 | show_plot = kwargs.get('show_plot', False) 24 | 25 | exclude_list = ['notes', 'N_runs', 'show_plot'] 26 | vary_params, vary_param_dict_list, vary_param_tups = fst.parseSingleAndListParams(kwargs,exclude_list) 27 | 28 | label = 'vary_' + fst.listToFname(vary_params) + '_' + notes 29 | dir = fst.makeLabelDateDir(label) 30 | print('Saving vary param results to: ', dir) 31 | img_ext = '.png' 32 | base_fname = fst.combineDirAndFile(dir, label + date_time) 33 | img_fname = base_fname + img_ext 34 | 35 | log_fname = base_fname + '_log.txt' 36 | fst.writeDictToFile(kwargs, log_fname) 37 | 38 | # Set the SD for each entry to 0. If there's only 1 run each, that's fine. If 39 | # there are several, it will replace the 0's. 40 | R_tots = [] 41 | SD = [0]*len(vary_param_dict_list) 42 | 43 | for i, kws in enumerate(vary_param_dict_list): 44 | 45 | print('\n{}\n'.format(vary_param_tups[i])) 46 | results = [] 47 | for j in range(N_runs): 48 | print('run ',j) 49 | 50 | p1 = Population.Population(**kws, dir=dir, fname_notes=fst.paramDictToFnameStr(vary_param_tups[i])) 51 | 52 | _, _, r_tot = p1.evolve(**kws) 53 | 54 | results.append(r_tot) 55 | 56 | R_tots.append(mean(results)) 57 | if N_runs>1: 58 | SD[i] = stdev(results) 59 | 60 | 61 | plt.close('all') 62 | fig,axes = plt.subplots(1,1,figsize=(6,9)) 63 | 64 | plt.errorbar(list(range(len(R_tots))), R_tots, yerr=SD, fmt='ro-') 65 | 66 | axes.set_xticks(list(range(len(R_tots)))) 67 | x_tick_labels = ['\n'.join(fst.dictToStringList(param)) for param in vary_param_tups] 68 | axes.set_xticklabels(x_tick_labels, rotation='vertical') 69 | axes.set_ylabel('Total reward') 70 | plt.tight_layout() 71 | plt.savefig(img_fname) 72 | 73 | vary_param_labels = [','.join(fst.dictToStringList(param)) for param in vary_param_tups] 74 | f = open(base_fname + '_values.txt','w+') 75 | for label, val, sd in zip(vary_param_labels, R_tots, SD): 76 | f.write('{}\t{}\t{}\n'.format(label, val, sd)) 77 | f.close() 78 | 79 | print('\n\ntook {} to execute'.format(fst.getTimeDiffStr(st))) 80 | 81 | plotRewardCurvesByVaryParam(dir, searchlabel='bestscore') 82 | plotRewardCurvesByVaryParam(dir, searchlabel='meanscore') 83 | 84 | if show_plot: 85 | plt.show() 86 | 87 | 88 | 89 | 90 | def plotRewardCurvesByVaryParam(dir, searchlabel, **kwargs): 91 | 92 | #Use the "values" file from now on to get the vary_param values 93 | # searchlabel will be the thing the fname we're searching for is prefaced with, 94 | # so we can do the same for multiple things. 95 | 96 | # Find the values file 97 | val_file_list = glob.glob(fst.addTrailingSlashIfNeeded(dir) + 'vary_' + '*' + 'values.txt') 98 | 99 | assert len(val_file_list)==1, 'there needs to be exactly one values.txt file.' 100 | 101 | vals_file = val_file_list[0] 102 | 103 | # Read in each line, corresponding to each vary params tuple 104 | with open(vals_file, 'r') as f: 105 | vary_param_vals = f.read().split('\n') 106 | 107 | # they're tab sep'd, so split and grab the first of each col. 108 | vary_param_vals = [x.split('\t')[0] for x in vary_param_vals if x!=''] 109 | # Expects the vary params to be separated by underscores -- not ideal. 110 | vary_param_vals = [x.replace(',','_') for x in vary_param_vals] 111 | # Get the files that contain this series of vary vals... 112 | vary_param_files = [glob.glob(fst.addTrailingSlashIfNeeded(dir) + searchlabel + '*' + val + '*' + '.txt') for val in vary_param_vals] 113 | 114 | 115 | 116 | fig, ax = plt.subplots(1, 1, figsize=(10,8)) 117 | 118 | line_cols = ['darkred', 'mediumblue', 'darkgreen', 'goldenrod', 'purple', 'darkorange', 'black'] 119 | shade_cols = ['tomato', 'dodgerblue', 'lightgreen', 'khaki', 'plum', 'peachpuff', 'lightgray'] 120 | max_total = -1000 121 | min_total = 1000 122 | N_stds = 2 123 | N_skip = 0 124 | 125 | # This is a really hacky way of lining up curves that are shifted. You pass it a list of 126 | # how each curve (the avg) will be scaled and how each will be offset. If you don't pass it 127 | # anything, it won't do anything differently. 128 | scale_factors = kwargs.get('scale_factors', np.ones(len(vary_param_vals))) 129 | offsets = kwargs.get('offsets', np.zeros(len(vary_param_vals))) 130 | 131 | # For doing the scale and offset thing, and making sure the ranges are right. 132 | print('vary_param_vals', vary_param_vals) 133 | for i, (val, file_group) in enumerate(zip(vary_param_vals, vary_param_files)): 134 | dat_array = np.array([np.loadtxt(fname) for fname in file_group]) 135 | avg = np.mean(dat_array, axis=0)*scale_factors[i] + offsets[i] 136 | std = np.std(dat_array, axis=0)*scale_factors[i] 137 | if max((avg + N_stds*std)[N_skip:]) > max_total: 138 | max_total = max((avg + N_stds*std)[N_skip:]) 139 | if min((avg - N_stds*std)[N_skip:]) < min_total: 140 | min_total = min((avg - N_stds*std)[N_skip:]) 141 | plt.plot(avg, color=line_cols[i], label=val) 142 | plt.fill_between(np.array(range(len(avg))), avg - std, avg + std, facecolor=shade_cols[i], alpha=0.5) 143 | 144 | #print(max_total, min_total) 145 | plt.legend() 146 | plt.xlabel('generations') 147 | plt.ylabel(searchlabel) 148 | plt.ylim((min_total,max_total)) 149 | 150 | plt.savefig(fst.addTrailingSlashIfNeeded(dir) + 'all_' + searchlabel + '__'.join(vary_param_vals) + '__' + fst.getDateString() + '.png') 151 | 152 | # For each one separately 153 | print('vary_param_vals', vary_param_vals) 154 | for i, (val, file_group) in enumerate(zip(vary_param_vals, vary_param_files)): 155 | #print(max_total, min_total) 156 | plt.clf() 157 | dat_array = np.array([np.loadtxt(fname) for fname in file_group]) 158 | avg = np.mean(dat_array, axis=0)*scale_factors[i] + offsets[i] 159 | std = np.std(dat_array, axis=0)*scale_factors[i] 160 | 161 | plt.plot(avg, color=line_cols[i], label=val) 162 | plt.legend() 163 | plt.xlabel('generations') 164 | plt.ylabel(searchlabel) 165 | plt.ylim((min_total,max_total)) 166 | plt.fill_between(np.array(range(len(avg))), avg - std, avg + std, facecolor=shade_cols[i], alpha=0.5) 167 | plt.savefig(fst.addTrailingSlashIfNeeded(dir) + searchlabel + '__' + val + '__' + fst.getDateString() + '.png') 168 | 169 | 170 | 171 | def plotPopulationProperty(dir, search_label, **kwargs): 172 | 173 | 174 | show_plot = kwargs.get('show_plot', False) 175 | save_plot = kwargs.get('save_plot', True) 176 | make_hist_gif = kwargs.get('make_hist_gif', True) 177 | 178 | # Find the values file 179 | prop_file_list = glob.glob(fst.addTrailingSlashIfNeeded(dir) + search_label + '*' + '.txt') 180 | 181 | assert len(prop_file_list)==1, 'there needs to be exactly one .txt file.' 182 | 183 | prop_file = prop_file_list[0] 184 | 185 | prop_dat = np.loadtxt(prop_file) 186 | 187 | N_gen = prop_dat.shape[0] 188 | max_gif_frames = 299 189 | gif_save_period = ceil(N_gen/max_gif_frames) 190 | 191 | avg = np.mean(prop_dat) 192 | std = np.std(prop_dat) 193 | 194 | print('dat min: {:.2f}'.format(np.min(prop_dat))) 195 | print('dat max: {:.2f}'.format(np.max(prop_dat))) 196 | print('dat mean: {:.2f}'.format(avg)) 197 | print('dat std: {:.2f}'.format(std)) 198 | 199 | dat_lb = max(np.min(prop_dat), avg - 2*std) 200 | dat_ub = min(np.max(prop_dat), avg + 2*std) 201 | 202 | dat_lims = (dat_lb - 0.1*abs(dat_lb), dat_ub + 0.1*abs(dat_ub)) 203 | 204 | # Make histogram gif 205 | 206 | if make_hist_gif: 207 | 208 | gif_dir = fst.makeDir(fst.combineDirAndFile(dir, 'gif_imgs')) 209 | 210 | for i, gen_dat in enumerate(prop_dat): 211 | plt.clf() 212 | plt.hist(gen_dat, facecolor='dodgerblue', edgecolor='k', label=search_label, alpha=0.9, density=True) 213 | plt.axvline(np.mean(gen_dat), color='k', linestyle='dashed', linewidth=1) 214 | plt.xlim(dat_lims) 215 | plt.ylim((0, 1.0/len(gen_dat))) 216 | plt.title(f'generation {i}') 217 | plt.xlabel(search_label) 218 | plt.ylabel('counts') 219 | 220 | if save_plot: 221 | if i%gif_save_period == 0: 222 | fname = fst.combineDirAndFile(gif_dir, f'{i}.png') 223 | plt.savefig(fname) 224 | 225 | 226 | try: 227 | gif_name = fst.gifFromImages(gif_dir, f'{search_label}_hist', ext='.png', delay=5) 228 | print(gif_name) 229 | gif_basename = fst.fnameFromFullPath(gif_name) 230 | subprocess.check_call(['mv', gif_name, fst.combineDirAndFile(dir, gif_basename)]) 231 | subprocess.check_call(['rm', '-rf', gif_dir]) 232 | except: 233 | print('problem in creating gif') 234 | 235 | 236 | plt.clf() 237 | 238 | # Make time scatter plot 239 | 240 | # Right now this is in the structure where a row is a generation and each 241 | # entry of that row is an individ. We want to plot it per gen, so we need 242 | # to make it a set of (generation, value) points (so 5 gens of 8 individs would 243 | # go 5x8 -> 5x8x2 -> 40x2 -> 2x40) 244 | prop_pts = np.array([[[i, val] for val in prop_dat[i]] for i in range(len(prop_dat))]) 245 | N_tot_entries = prop_dat.shape[0]*prop_dat.shape[1] # Makes it 5x8x2 246 | prop_pts = np.reshape(prop_pts, (N_tot_entries, 2)) # Makes it 40x2 247 | prop_pts = np.swapaxes(prop_pts, 0, 1) # Makes it 2x40 248 | 249 | plt.plot(prop_pts[0], prop_pts[1], 'o', color='dodgerblue') 250 | plt.xlabel('generations') 251 | plt.ylabel(search_label) 252 | plt.ylim(dat_lims) 253 | 254 | if save_plot: 255 | plt.savefig(fst.combineDirAndFile(dir, search_label + '_scatter_plot.png')) 256 | 257 | if show_plot: 258 | plt.show() 259 | 260 | plt.clf() 261 | 262 | # Make time std plot 263 | 264 | gen_mean = np.mean(prop_dat, axis=1) 265 | gen_std = np.std(prop_dat, axis=1) 266 | 267 | line_cols = ['darkred', 'mediumblue', 'darkgreen', 'goldenrod', 'purple', 'darkorange', 'black'] 268 | shade_cols = ['tomato', 'dodgerblue', 'lightgreen', 'khaki', 'plum', 'peachpuff', 'lightgray'] 269 | 270 | plt.fill_between(np.array(range(len(gen_mean))), gen_mean - gen_std, gen_mean + gen_std, facecolor=shade_cols[0], alpha=0.5) 271 | plt.plot(gen_mean, color=line_cols[0]) 272 | 273 | plt.xlabel('generations') 274 | plt.ylabel(search_label) 275 | plt.ylim(dat_lims) 276 | 277 | if save_plot: 278 | plt.savefig(fst.combineDirAndFile(dir, search_label + '_mean-std_plot.png')) 279 | 280 | if show_plot: 281 | plt.show() 282 | 283 | plt.close() 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | # 302 | -------------------------------------------------------------------------------- /classes/Population.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./classes') 3 | from EPANN import EPANN 4 | from copy import deepcopy 5 | import matplotlib.pyplot as plt 6 | import FileSystemTools as fst 7 | import RunTools as rt 8 | import numpy as np 9 | import subprocess 10 | from math import sqrt, floor 11 | import movie_combine 12 | import traceback as tb 13 | 14 | class Population: 15 | 16 | 17 | def __init__(self, **kwargs): 18 | 19 | self.agent_class = kwargs.get('agent_class', None) 20 | assert self.agent_class is not None, 'Need to provide an agent class! exiting' 21 | 22 | self.init_kwargs = kwargs 23 | 24 | self.N_pop = kwargs.get('N_pop', 15) 25 | self.mut_type = kwargs.get('mut_type', 'change_topo') 26 | self.gauss_std = kwargs.get('std', 0.2) 27 | self.best_N_frac = kwargs.get('best_N_frac', 1/5.0) 28 | 29 | self.fname_notes = '{}_{}{}'.format(kwargs.get('fname_notes', ''), self.agent_class.__name__, kwargs.get('env_name', '')) 30 | self.datetime_str = fst.getDateString() 31 | self.base_dir = kwargs.get('base_dir', 'misc_runs') 32 | self.dir = fst.combineDirAndFile(self.base_dir, 'evolve_{}_{}'.format(self.datetime_str, self.fname_notes)) 33 | fst.makeDir(self.dir) 34 | self.plot_dir = fst.makeDir(fst.combineDirAndFile(self.dir, 'plots')) 35 | print('run dir: ', self.dir) 36 | pop_kwargs = {'run_dir' : self.dir} 37 | both_kwargs = {**kwargs, **pop_kwargs} 38 | 39 | self.population = [EPANN(**both_kwargs) for i in range(self.N_pop)] 40 | 41 | 42 | 43 | def evolve(self, **kwargs): 44 | 45 | 46 | start_time = fst.getCurTimeObj() 47 | 48 | N_trials_per_agent = kwargs.get('N_trials_per_agent', 3) 49 | N_episode_steps = kwargs.get('N_episode_steps', 400) 50 | N_gen = kwargs.get('N_gen', 50) 51 | N_trials_per_agent = kwargs.get('N_trials_per_agent', 3) 52 | N_runs_each_champion = kwargs.get('N_runs_each_champion', 5) 53 | N_runs_with_best = kwargs.get('N_runs_with_best', 10) 54 | assert N_runs_with_best > 0, 'Need at least one run with best individ!' 55 | record_final_runs = kwargs.get('record_final_runs', False) 56 | show_final_runs = kwargs.get('show_final_runs', False) 57 | 58 | # Create a log file for the kwargs 59 | log_fname = fst.combineDirAndFile(self.dir, f'log_{self.datetime_str}.txt') 60 | fst.writeDictToFile({**self.init_kwargs, **kwargs}, log_fname) 61 | 62 | 63 | best_FFs = [] 64 | mean_FFs = [] 65 | 66 | all_FFs = [] 67 | all_nodecounts = [] 68 | all_weightcounts = [] 69 | champion_FF_mean_std = [] # A list of pairs of [mean, std] for runs of the current champion. 70 | 71 | 72 | #try: 73 | for i in range(N_gen): 74 | 75 | best_FF = -100000000 76 | mean_FF = 0 77 | 78 | mean_Rs = [] 79 | for j, individ in enumerate(self.population): 80 | mean_episode_score = 0 81 | for run in range(N_trials_per_agent): 82 | mean_episode_score += individ.runEpisode(N_episode_steps) 83 | 84 | mean_episode_score = mean_episode_score/N_trials_per_agent 85 | mean_Rs.append([j, mean_episode_score]) 86 | mean_FF += mean_episode_score 87 | if mean_episode_score > best_FF: 88 | best_FF = mean_episode_score 89 | 90 | mean_FF = mean_FF/self.N_pop 91 | best_FFs.append(best_FF) 92 | mean_FFs.append(mean_FF) 93 | 94 | mean_Rs_no_label = [x[1] for x in mean_Rs] 95 | 96 | # Run the champion for several times to get stats 97 | champion_ind = self.sortByFitnessFunction(mean_Rs)[0][0] 98 | champion_scores = [] 99 | for run in range(N_runs_each_champion): 100 | champion_scores.append(self.population[champion_ind].runEpisode(N_episode_steps)) 101 | champion_FF_mean_std.append([np.mean(champion_scores), np.std(champion_scores)]) 102 | 103 | 104 | # Update with progress 105 | if i%max(1, int(N_gen/20))==0: 106 | print('\ngen {:.1f}. Best FF = {:.4f}, mean FF = {:.4f}'.format(i, best_FF, mean_FF)) 107 | self.plotPopHist(mean_Rs_no_label, 'pop_FF') 108 | if self.mut_type == 'change_topo': 109 | self.plotPopHist([len(epann.node_list) for epann in self.population], 'pop_nodecount') 110 | self.plotPopHist([len(epann.weights_list) for epann in self.population], 'pop_weightcount') 111 | fname = fst.combineDirAndFile(self.plot_dir, '{}_gen{}_{}.png'.format('bestNN', i, fst.getDateString())) 112 | self.population[champion_ind].plotNetwork(show_plot=False, save_plot=True, fname=fname, node_legend=True) 113 | 114 | #print('network sizes: ', [len(x.node_list) for x in self.population]) 115 | print('avg network size: {:.3f}'.format(sum([len(x.node_list) for x in self.population])/self.N_pop)) 116 | #print('# network connections: ', [len(x.weights_list) for x in self.population]) 117 | print('avg # network connections: {:.3f}'.format(sum([len(x.weights_list) for x in self.population])/self.N_pop)) 118 | 119 | all_FFs.append(mean_Rs_no_label) 120 | all_nodecounts.append([len(epann.node_list) for epann in self.population]) 121 | all_weightcounts.append([len(epann.weights_list) for epann in self.population]) 122 | 123 | # Get the next gen by mutating 124 | self.getNextGen(mean_Rs) 125 | 126 | 127 | print('\n\nRun took: ', fst.getTimeDiffStr(start_time), '\n\n') 128 | 129 | self.saveScore(best_FFs, 'bestscore') 130 | self.saveScore(mean_FFs, 'meanscore') 131 | self.saveScore(all_FFs, 'all_FFs') 132 | self.saveScore(all_nodecounts, 'nodecounts') 133 | self.saveScore(all_weightcounts, 'weightcounts') 134 | self.saveScore(champion_FF_mean_std, 'champion_FF_mean_std') 135 | 136 | # Plot best and mean FF curves for the population 137 | plt.subplots(1, 1, figsize=(8,8)) 138 | plt.plot(mean_FFs, color='dodgerblue', label='Pop. avg FF') 139 | plt.plot(best_FFs, color='tomato', label='Pop. best FF') 140 | plt.xlabel('generations') 141 | plt.ylabel('FF') 142 | plt.legend() 143 | fname = fst.combineDirAndFile(self.dir, '{}_{}.png'.format('FFplot', self.datetime_str)) 144 | plt.savefig(fname) 145 | 146 | plt.close() 147 | 148 | # Plot the mean and std for the champion of each generation 149 | champion_FF_mean_std = np.array(champion_FF_mean_std) 150 | champ_mean = champion_FF_mean_std[:,0] 151 | champ_std = champion_FF_mean_std[:,1] 152 | plt.fill_between( 153 | np.array(range(len(champ_mean))), 154 | champ_mean - champ_std, 155 | champ_mean + champ_std, 156 | facecolor='dodgerblue', alpha=0.5) 157 | 158 | plt.plot(champ_mean, color='mediumblue') 159 | plt.xlabel('generations') 160 | plt.ylabel('FF') 161 | fname = fst.combineDirAndFile(self.dir, '{}_{}.png'.format('champion_mean-std_plot', self.datetime_str)) 162 | plt.savefig(fname) 163 | 164 | # Get an avg final score for the best individ. You know this will be the best one because 165 | # the best one is preserved after getNextGen(). 166 | best_individ = self.population[0] 167 | 168 | # Save the NN of the best individ. 169 | bestNN_fname = fst.combineDirAndFile(self.dir, f'bestNN_{self.agent_class.__name__}_{self.datetime_str}') 170 | best_individ.saveNetworkToFile(fname=(bestNN_fname + '.json')) 171 | best_individ.plotNetwork(show_plot=False, save_plot=True, fname=(bestNN_fname + '.png'), node_legend=True) 172 | 173 | # Something annoying happening with showing vs recording the final runs, but I'll figure it out later. 174 | best_individ_scores = [best_individ.runEpisode(N_episode_steps, 175 | show_episode=show_final_runs, 176 | record_episode=record_final_runs, 177 | **kwargs) for i in range(N_runs_with_best)] 178 | #best_individ.agent.closeEnv() 179 | best_individ_avg_score = np.mean(best_individ_scores) 180 | 181 | # Plot some more stuff with the saved dat 182 | try: 183 | rt.plotPopulationProperty(self.dir, 'all_FFs', make_hist_gif=False) 184 | rt.plotPopulationProperty(self.dir, 'weightcounts', make_hist_gif=False) 185 | except: 186 | print('\n\n') 187 | print(tb.format_exc()) 188 | print('plotPopulationProperty() failed, continuing') 189 | 190 | 191 | 192 | try: 193 | if record_final_runs: 194 | N_side = min(3, floor(sqrt(N_runs_with_best))) 195 | movie_dir = best_individ.agent.record_dir 196 | movie_combine.combineMovieFiles(path=movie_dir, grid_size=f'{N_side}x{N_side}', make_gif=True) 197 | except: 198 | print('\n\n') 199 | print(tb.format_exc()) 200 | print('failed combining movies into single panel') 201 | 202 | return_dict = {} 203 | return_dict['best_FFs'] = best_FFs 204 | return_dict['mean_FFs'] = mean_FFs 205 | return_dict['best_individ_avg_score'] = best_individ_avg_score 206 | 207 | return(return_dict) 208 | 209 | 210 | def getNextGen(self, FF_list): 211 | 212 | ''' 213 | This first sorts the pop by the (index, FF) list passed to it. 214 | Then it takes the best_N of these indices in order. It starts the 215 | new_pop with a clones of the best individ from the last gen. Then it adds 216 | to new_pop by mutating the best_N until the pop is filled again. 217 | 218 | So, you can assume that for the new pop., pop[0] is the best one of the 219 | LAST generation. 220 | 221 | ''' 222 | 223 | pop_indices_sorted = self.sortByFitnessFunction(FF_list) 224 | best_N = max(int(self.N_pop*self.best_N_frac), 2) 225 | #best_N = 1 226 | best_N_indices = [x[0] for x in pop_indices_sorted[:best_N]] 227 | 228 | new_pop = [self.population[best_N_indices[0]].clone()] 229 | mod_counter = 0 230 | 231 | while len(new_pop)0: 263 | par_index, child_index = random.choice(list(self.weights_list)) 264 | self.addNodeInBetween(par_index, child_index) 265 | 266 | 267 | def mutateAddWeight(self, std=0.1): 268 | N_attempts = 4 269 | i = 0 270 | while True: 271 | if i>N_attempts: 272 | return(0) 273 | else: 274 | i += 1 275 | node_1_ind = random.choice(list(range(len(self.node_list)))) 276 | 277 | # No self 278 | node_2_options = [ind for ind in range(len(self.node_list)) if ind != node_1_ind] 279 | 280 | if (node_1_ind in self.input_node_indices) or (node_1_ind == self.bias_node_index): 281 | node_2_options = [ind for ind in node_2_options if (ind not in self.input_node_indices) and (ind != self.bias_node_index)] 282 | weight_connection_options = [(node_1_ind, ind) for ind in node_2_options if ((node_1_ind, ind) not in self.weights_list)] 283 | 284 | elif node_1_ind in self.output_node_indices: 285 | node_2_options = [ind for ind in node_2_options if ind not in self.output_node_indices] 286 | weight_connection_options = [(ind, node_1_ind) for ind in node_2_options if ((ind, node_1_ind) not in self.weights_list)] 287 | 288 | else: 289 | #if it's neither an input or output 290 | 291 | # The options if node 2 is going to be the parent. 292 | node_2_weight_options_parent = [(ind, node_1_ind) for ind in node_2_options if (ind not in self.output_node_indices) and (not self.getsInputFrom(ind, node_1_ind))] 293 | 294 | # In both cases, we need to check that either node_2 is not in prop_order 295 | # (meaning it can go anywhere, provided it's not i/o), OR that 296 | # it doesn't get indirect input from ind. 297 | # 298 | # The options if node 2 is going to be the child. 299 | node_2_weight_options_child = [(node_1_ind, ind) for ind in node_2_options if ((ind not in self.input_node_indices) and (ind != self.bias_node_index)) and (not self.getsInputFrom(node_1_ind, ind))] 300 | 301 | # Combine them. 302 | weight_connection_options = node_2_weight_options_parent + node_2_weight_options_child 303 | weight_connection_options = [w for w in weight_connection_options if w not in self.weights_list] 304 | 305 | if len(weight_connection_options)==0: 306 | # If there aren't any options by this point, continue to try again 307 | continue 308 | else: 309 | weight_connection_tuple = random.choice(weight_connection_options) 310 | break 311 | 312 | self.addConnectingWeight(weight_connection_tuple, val=None, std=std) 313 | 314 | 315 | def mutateChangeWeight(self, std=0.1): 316 | if len(self.weights_list)>0: 317 | par_index, child_index = random.choice(list(self.weights_list)) 318 | self.print('changing weight between {} and {}'.format(par_index, child_index)) 319 | self.node_list[par_index].mutateOutputWeight(child_index, std=std) 320 | 321 | 322 | def mutateRemoveWeight(self): 323 | if len(self.weights_list)>0: 324 | par_index, child_index = random.choice(list(self.weights_list)) 325 | self.print('removing weight between {} and {}'.format(par_index, child_index)) 326 | self.removeConnectingWeight((par_index, child_index)) 327 | 328 | 329 | def mutate(self, std=0.1): 330 | 331 | self.print('\n\nbefore mutate:') 332 | if self.verbose: 333 | self.printNetwork() 334 | 335 | if random.random() < self.node_add_chance: 336 | # Add a node by splitting an existing weight 337 | self.mutateAddNode() 338 | 339 | 340 | if random.random() < self.weight_add_chance: 341 | # Add weight between two nodes 342 | self.mutateAddWeight(std=std) 343 | 344 | 345 | if random.random() < self.weight_change_chance: 346 | # Change weight 347 | self.mutateChangeWeight(std=std) 348 | 349 | 350 | if random.random() < self.weight_remove_chance: 351 | # Remove weight 352 | self.mutateRemoveWeight() 353 | 354 | 355 | self.print('\nafter mutate:') 356 | if self.verbose: 357 | self.printNetwork() 358 | 359 | 360 | 361 | 362 | def getsInputFrom(self, n1_index, n2_index): 363 | 364 | # This is to check if n1 gets input from n2, indirectly. 365 | 366 | n1 = self.node_list[n1_index] 367 | n2 = self.node_list[n2_index] 368 | lineage_q = Queue() 369 | # You need this! Or it won't check its own parents! 370 | lineage_q.put(n1_index) 371 | [lineage_q.put(n) for n in n1.input_indices] 372 | 373 | while lineage_q.qsize() > 0: 374 | next = lineage_q.get() 375 | if n2_index in self.node_list[next].input_indices: 376 | return(True) 377 | else: 378 | [lineage_q.put(n) for n in self.node_list[next].input_indices] 379 | 380 | return(False) 381 | 382 | 383 | 384 | def propagateNodeOutput(self, node_index): 385 | 386 | # This assumes that the propagate_order list is already sorted! 387 | # If it isn't, you'll get some bad results. 388 | node = self.node_list[node_index] 389 | 390 | for target_node_index in node.getOutputIndices(): 391 | self.node_list[target_node_index].addToInputsReceived(node.getValue()*node.output_weights[target_node_index]) 392 | 393 | 394 | def forwardPass(self, input_vec): 395 | 396 | self.clearAllNodes() 397 | 398 | # Put the input vec into the input nodes 399 | for i, index in enumerate(self.input_node_indices): 400 | self.node_list[index].value = input_vec[i] 401 | 402 | # For each node in the sorted propagate list, propagate to its children 403 | for ind in self.propagate_order: 404 | self.propagateNodeOutput(ind) 405 | 406 | output_vec = np.array([self.node_list[ind].getValue() for ind in self.output_node_indices]) 407 | 408 | if self.action_space_type == 'discrete': 409 | action = self.epsGreedyOutput(output_vec) 410 | elif self.action_space_type == 'continuous': 411 | # Need to fix if there are several cont. directions, but won't deal with that 412 | # for now. Actually, it seems like even when it's one continuous action, you're 413 | # supposed to supply it a list?? 414 | action = output_vec 415 | 416 | return(action) 417 | 418 | 419 | def epsGreedyOutput(self, vec): 420 | if random.random() < self.epsilon: 421 | return(random.randint(0, len(vec)-1)) 422 | else: 423 | return(self.greedyOutput(vec)) 424 | 425 | 426 | def greedyOutput(self, vec): 427 | return(np.argmax(vec)) 428 | 429 | 430 | def softmaxOutput(self, vec): 431 | a = np.array(vec) 432 | a = np.exp(a) 433 | a = a/sum(a) 434 | return(np.random.choice(list(range(len(a))), p=a)) 435 | 436 | 437 | 438 | def setMaxEpisodeSteps(self, N_steps): 439 | self.agent.setMaxEpisodeSteps(N_steps) 440 | 441 | 442 | def clearAllNodes(self): 443 | [n.clearNode() for i,n in enumerate(self.node_list) if i!=self.bias_node_index] 444 | 445 | 446 | def runEpisode(self, N_steps, **kwargs): 447 | 448 | 449 | R_tot = 0 450 | Rs = [] 451 | 452 | show_episode = kwargs.get('show_episode', False) 453 | record_episode = kwargs.get('record_episode', False) 454 | 455 | if show_episode: 456 | self.createFig() 457 | 458 | if record_episode: 459 | self.agent.setMonitorOn(show_run=show_episode) 460 | 461 | self.agent.initEpisode() 462 | 463 | for i in range(N_steps): 464 | self.clearAllNodes() 465 | 466 | if i%int(N_steps/10)==0: 467 | self.print('R_tot = {:.3f}'.format(R_tot)) 468 | 469 | 470 | s = self.agent.getStateVec() 471 | a = self.forwardPass(s) 472 | self.print('s = {}, a = {}'.format(s, a)) 473 | 474 | r, s, done = self.agent.iterate(a) 475 | 476 | R_tot += r 477 | Rs.append(R_tot) 478 | 479 | if done: 480 | #return(R_tot) 481 | break 482 | 483 | if show_episode or record_episode: 484 | if self.render_type == 'matplotlib': 485 | self.agent.drawState(self.axes[0]) 486 | self.axes[1].clear() 487 | self.axes[1].plot(Rs) 488 | self.fig.canvas.draw() 489 | elif self.render_type == 'gym': 490 | self.agent.drawState() 491 | 492 | 493 | if record_episode: 494 | print('R_tot = {:.3f}'.format(R_tot)) 495 | 496 | self.print('R_tot/N_steps = {:.3f}'.format(R_tot/N_steps)) 497 | 498 | return(R_tot) 499 | 500 | 501 | 502 | 503 | def gaussMutate(self, std=0.1): 504 | # This mutates ALL of a node's output weights! 505 | for n in self.node_list: 506 | for w in n.getOutputIndices(): 507 | n.output_weights[w] += np.random.normal(scale=std) 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | def clone(self): 517 | clone = deepcopy(self) 518 | return(clone) 519 | 520 | 521 | def createFig(self): 522 | if self.render_type == 'matplotlib': 523 | self.fig, self.axes = plt.subplots(1,2, figsize=(16,8)) 524 | plt.show(block=False) 525 | 526 | 527 | 528 | 529 | def print(self, str): 530 | 531 | if self.verbose: 532 | print(str) 533 | 534 | 535 | 536 | def printNetwork(self): 537 | print('\n') 538 | for i, n in enumerate(self.node_list): 539 | print('\nnode ', i) 540 | print('input indices:', n.input_indices) 541 | print('output indices: ', n.getOutputIndices()) 542 | print('output weights: ', n.getOutputWeightStr()) 543 | 544 | print() 545 | 546 | 547 | def plotNetwork(self, show_plot=True, save_plot=False, fname=None, node_legend=False): 548 | 549 | fig, ax = plt.subplots(1, 1, figsize=(12,8)) 550 | DG = nx.DiGraph() 551 | 552 | other_node_indices = [i for i,n in enumerate(self.node_list) if ((i not in self.input_node_indices) and (i not in self.output_node_indices) and (i != self.bias_node_index))] 553 | 554 | DG.add_node(self.bias_node_index) 555 | 556 | for i in self.input_node_indices: 557 | DG.add_node(i) 558 | 559 | for i in self.output_node_indices: 560 | DG.add_node(i) 561 | 562 | # I think you have to add this, because if you have a node that doesn't have any connections 563 | # and it's not I/O/B, then it will never get entered into DG without this. 564 | for i in other_node_indices: 565 | DG.add_node(i) 566 | 567 | for n in self.node_list: 568 | for o in n.getOutputIndices(): 569 | DG.add_edges_from([(n.node_index, o)]) 570 | 571 | pos = nx.drawing.nx_agraph.graphviz_layout(DG, prog='dot') 572 | 573 | try: 574 | nx.draw_networkx_nodes(DG, nodelist=self.input_node_indices, pos=pos, node_color='lightgreen', node_size=600) 575 | nx.draw_networkx_nodes(DG, nodelist=self.output_node_indices, pos=pos, node_color='orange', node_size=600) 576 | nx.draw_networkx_nodes(DG, nodelist=[self.bias_node_index], pos=pos, node_color='forestgreen', node_size=600) 577 | nx.draw_networkx_nodes(DG, nodelist=other_node_indices, pos=pos, node_color='plum', node_size=600) 578 | except: 579 | print('problem drawing nx nodes. pos:') 580 | print(pos) 581 | exit() 582 | 583 | for w in self.weights_list: 584 | weight = self.node_list[w[0]].output_weights[w[1]] 585 | if weight < 0: 586 | nx.draw_networkx_edges(DG, pos=pos, edgelist=[w], width=4.0, alpha=min(abs(weight), 1), edge_color='tomato') 587 | 588 | if weight >= 0: 589 | nx.draw_networkx_edges(DG, pos=pos, edgelist=[w], width=4.0, alpha=min(abs(weight), 1), edge_color='dodgerblue') 590 | 591 | labels = {i:str(i) for i in range(len(self.node_list))} 592 | nx.draw_networkx_labels(DG, pos=pos, labels=labels, font_size=14) 593 | edge_labels = {w:'{:.2f}'.format(self.node_list[w[0]].output_weights[w[1]]) for w in self.weights_list} 594 | nx.draw_networkx_edge_labels(DG, pos=pos, edge_labels=edge_labels, font_size=10, bbox={'alpha':0.2, 'pad':0.0}, label_pos=0.85) 595 | 596 | plt.xticks([]) 597 | plt.yticks([]) 598 | plt.subplots_adjust(left=.2, bottom=0, right=1, top=1, wspace=1, hspace=0) 599 | ax.axis('off') 600 | 601 | if node_legend: 602 | if (self.agent.state_labels is not None) and (self.agent.action_labels is not None): 603 | props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) 604 | 605 | percent_offset = 0.02 606 | 607 | bias_str = 'Bias: node {}\n\n'.format(self.bias_node_index) 608 | input_str = bias_str + 'Inputs:\n\n' + '\n'.join(['node {} = {}'.format(ind, self.agent.state_labels[i]) for i, ind in enumerate(self.input_node_indices)]) 609 | ax.text(-percent_offset, (1-3*percent_offset), input_str, transform=ax.transAxes, fontsize=10, verticalalignment='top', horizontalalignment='right', bbox=props) 610 | 611 | output_str = 'Outputs:\n\n' + '\n'.join(['node {} = {}'.format(ind, self.agent.action_labels[i]) for i, ind in enumerate(self.output_node_indices)]) 612 | ax.text(-percent_offset, 3*percent_offset, output_str, transform=ax.transAxes, fontsize=10, verticalalignment='bottom', horizontalalignment='right', bbox=props) 613 | textstr = input_str + '\n\n' + output_str 614 | 615 | 616 | # place a text box in upper left in axes coords 617 | 618 | if save_plot: 619 | if fname is not None: 620 | plt.savefig(fname) 621 | else: 622 | default_fname = 'misc_runs/{}_NN_{}.png'.format(self.agent_class.__name__, fst.getDateString()) 623 | plt.savefig(default_fname) 624 | 625 | if show_plot: 626 | plt.show() 627 | 628 | plt.close() 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | ''' 646 | SCRAP 647 | 648 | 649 | 650 | 651 | 652 | 653 | # When finished, so all output nodes should be full 654 | self.print('\n\nProp. done, output node values:') 655 | for ind in self.output_node_indices: 656 | self.print('Node {} output: {:.3f}'.format(ind, self.node_list[ind].value)) 657 | 658 | 659 | 660 | 661 | if par_index != self.bias_node_index: 662 | self.node_list[self.bias_node_index].addToOutputWeights(new_node.node_index) 663 | self.node_list[self.bias_node_index].output_weights[new_node.node_index] = 0 664 | new_node.addToInputIndices(self.bias_node_index) 665 | 666 | 667 | 668 | # Bias node 669 | bias_node = Node(len(self.node_list)) 670 | bias_node.setToBiasNode() 671 | #bias_node.setOutputIndices(self.output_node_indices) 672 | self.node_list.append(bias_node) 673 | 674 | 675 | # Add input nodes 676 | for i in range(self.N_inputs): 677 | new_node = Node(len(self.node_list)) 678 | new_node.setOutputIndices(self.output_node_indices) 679 | #uself.weights_list.append() 680 | new_node.setRandomOutputWeights() 681 | new_node.setToInputNode() 682 | self.node_list.append(new_node) 683 | 684 | 685 | # Add output nodes 686 | for i in range(self.N_total_outputs): 687 | new_node = Node(len(self.node_list)) 688 | new_node.setInputIndices(self.input_node_indices) 689 | #new_node.addToInputIndices(self.bias_node_index) 690 | new_node.setToOutputNode() 691 | self.node_list.append(new_node) 692 | 693 | # Add hidden layer nodes 694 | for i in range(self.N_init_hidden_nodes): 695 | new_node = Node(len(self.node_list)) 696 | new_node.setInputIndices(self.input_node_indices) 697 | #new_node.addToInputIndices(self.bias_node_index) 698 | new_node.setOutputIndices(self.output_node_indices) 699 | new_node.setRandomOutputWeights() 700 | 701 | #self.node_list[self.bias_node_index].addToOutputWeights(new_node.node_index) 702 | 703 | for ii in self.input_node_indices: 704 | self.node_list[ii].addToOutputWeights(new_node.node_index) 705 | 706 | for o in self.output_node_indices: 707 | self.node_list[o].addToInputIndices(new_node.node_index) 708 | 709 | self.node_list.append(new_node) 710 | 711 | # Set initial random output weight 712 | for i, n in enumerate(self.node_list): 713 | N_incoming_connect = n.getNInputs() 714 | for j in n.input_indices: 715 | self.node_list[j].output_weights[i] = np.random.normal(scale=(1.0/N_incoming_connect)) 716 | 717 | # Set all the bias weights to 0 to start. 718 | for i in self.node_list[self.bias_node_index].getOutputIndices(): 719 | self.node_list[self.bias_node_index].output_weights[i] = 0 720 | 721 | self.node_list[self.bias_node_index].value = 1 722 | 723 | 724 | 725 | if (node_1_ind not in self.propagate_order) and not (): 726 | # This one is easy: if it's not in propagate_order, then it's not connected to anything else, 727 | # so we can attach it to any other. 728 | node_2_ind = random.choice(node_2_options) 729 | if (node_2_ind in self.input_node_indices) or node_2_ind == self.bias_node_index: 730 | weight_connection_tuple = (node_2_ind, node_1_ind) 731 | 732 | elif node_2_ind in self.output_node_indices: 733 | weight_connection_tuple = (node_1_ind, node_2_ind) 734 | 735 | else: 736 | if random.random() < 0.5: 737 | weight_connection_tuple = (node_1_ind, node_2_ind) 738 | else: 739 | weight_connection_tuple = (node_2_ind, node_1_ind) 740 | 741 | break 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | ''' 756 | 757 | 758 | # 759 | --------------------------------------------------------------------------------