├── .gitignore
├── misc
    └── cover_img_neat.png
├── __pycache__
    ├── NN.cpython-36.pyc
    ├── EPANN.cpython-36.pyc
    ├── Node.cpython-36.pyc
    ├── agent1.cpython-36.pyc
    ├── GymAgent.cpython-36.pyc
    ├── PopTests.cpython-36.pyc
    ├── Population.cpython-36.pyc
    ├── Walker_1D.cpython-36.pyc
    ├── CartPoleAgent.cpython-36.pyc
    ├── CartpoleAgent.cpython-36.pyc
    ├── PendulumAgent.cpython-36.pyc
    ├── movie_combine.cpython-36.pyc
    ├── profileOutput.cpython-36.pyc
    ├── LunarLanderAgent.cpython-36.pyc
    └── PuckworldAgent.cpython-36.pyc
├── scrap_old_test
    ├── networkx_test.py
    ├── Walker_1D.py
    ├── LunarLanderAgent.py
    ├── CartPoleAgent.py
    ├── PendulumAgent.py
    ├── PuckworldAgent.py
    └── agent1.py
├── createEnvJson.py
├── gym_env_info.json
├── README.md
├── ev1.py
├── ablation_test.py
├── movie_combine.py
└── classes
    ├── GymAgent.py
    ├── Node.py
    ├── FileSystemTools.py
    ├── PopTests.py
    ├── Population.py
    └── EPANN.py


/.gitignore:
--------------------------------------------------------------------------------
1 | misc_runs/
2 | save_runs/
3 | scrap_old_test*
4 | misc_runs*
5 | save_runs*
6 | 
7 | 


--------------------------------------------------------------------------------
/misc/cover_img_neat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/misc/cover_img_neat.png


--------------------------------------------------------------------------------
/__pycache__/NN.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/NN.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/EPANN.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/EPANN.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/Node.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/Node.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/agent1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/agent1.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/GymAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/GymAgent.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/PopTests.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/PopTests.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/Population.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/Population.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/Walker_1D.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/Walker_1D.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/CartPoleAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/CartPoleAgent.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/CartpoleAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/CartpoleAgent.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/PendulumAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/PendulumAgent.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/movie_combine.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/movie_combine.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/profileOutput.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/profileOutput.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/LunarLanderAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/LunarLanderAgent.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/PuckworldAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/PuckworldAgent.cpython-36.pyc


--------------------------------------------------------------------------------
/scrap_old_test/networkx_test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from EPANN import EPANN
 3 | from LunarLanderAgent import LunarLanderAgent
 4 | 
 5 | 
 6 | e = EPANN(agent_class=LunarLanderAgent, render_type='gym', N_init_hidden_nodes=0, init_IO_weights=True)
 7 | 
 8 | e.plotNetwork(show_plot=True, node_legend=True)
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | #
16 | 


--------------------------------------------------------------------------------
/createEnvJson.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('./classes')
 3 | import json
 4 | 
 5 | env_info_dict = {
 6 | 
 7 | 'Pendulum' : {
 8 | 'gym_env_name' : 'Pendulum-v0',
 9 | 'state_labels' : ['cos(ang)', 'sin(ang)', 'ang_vel'],
10 | 'action_labels' : ['torque'],
11 | 'action_space_type' : 'continuous',
12 | 'max_episode_steps' : 200
13 | },
14 | 
15 | 'LunarLander' : {
16 | 'gym_env_name' : 'LunarLander-v2',
17 | 'state_labels' : ['pos_x', 'pos_y', 'v_x', 'v_y', 'angle', 'v_ang'],
18 | 'action_labels' : ['nothing', 'engine_L', 'engine_main', 'engine_R'],
19 | 'action_space_type' : 'discrete',
20 | 'max_episode_steps' : 500
21 | },
22 | 
23 | 'CartPole' : {
24 | 'gym_env_name' : 'CartPole-v0',
25 | 'state_labels' : ['pos_cart', 'v_cart','pole_angle', 'v_poletip'],
26 | 'action_labels' : ['cart_L', 'cart_R',],
27 | 'action_space_type' : 'discrete',
28 | 'max_episode_steps' : 200
29 | },
30 | 
31 | 
32 | }
33 | 
34 | 
35 | fname = 'gym_env_info.json'
36 | 
37 | with open(fname, 'w') as outfile:
38 |     json.dump(env_info_dict, outfile, indent=4)
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | #
49 | 


--------------------------------------------------------------------------------
/gym_env_info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Pendulum": {
 3 |         "gym_env_name": "Pendulum-v0",
 4 |         "state_labels": [
 5 |             "cos(ang)",
 6 |             "sin(ang)",
 7 |             "ang_vel"
 8 |         ],
 9 |         "action_labels": [
10 |             "torque"
11 |         ],
12 |         "action_space_type": "continuous",
13 |         "max_episode_steps": 200
14 |     },
15 |     "LunarLander": {
16 |         "gym_env_name": "LunarLander-v2",
17 |         "state_labels": [
18 |             "pos_x",
19 |             "pos_y",
20 |             "v_x",
21 |             "v_y",
22 |             "angle",
23 |             "v_ang"
24 |         ],
25 |         "action_labels": [
26 |             "nothing",
27 |             "engine_L",
28 |             "engine_main",
29 |             "engine_R"
30 |         ],
31 |         "action_space_type": "discrete",
32 |         "max_episode_steps": 500
33 |     },
34 |     "CartPole": {
35 |         "gym_env_name": "CartPole-v0",
36 |         "state_labels": [
37 |             "pos_cart",
38 |             "v_cart",
39 |             "pole_angle",
40 |             "v_poletip"
41 |         ],
42 |         "action_labels": [
43 |             "cart_L",
44 |             "cart_R"
45 |         ],
46 |         "action_space_type": "discrete",
47 |         "max_episode_steps": 200
48 |     }
49 | }


--------------------------------------------------------------------------------
/scrap_old_test/Walker_1D.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Walker_1D:
 6 | 
 7 | 
 8 |     def __init__(self):
 9 | 
10 |         self.lims = np.array([-1.0, 1.0])
11 |         self.width = self.lims[1] - self.lims[0]
12 | 
13 |         self.step_size = self.width/50.0
14 | 
15 |         self.position = 0
16 |         self.target_position = None
17 | 
18 |         self.N_state_terms = len(self.getStateVec())
19 |         self.N_actions = 2
20 | 
21 | 
22 | 
23 |     def getStateVec(self):
24 |         return(np.array([self.position, self.target_position]))
25 | 
26 | 
27 |     def initEpisode(self):
28 |         self.resetPosition()
29 |         self.resetTarget()
30 | 
31 | 
32 |     def resetTarget(self):
33 | 
34 |         x = np.random.random()
35 |         self.target_position = self.lims[0] + self.width*x
36 |         # print('new target pos: {:.3f}'.format(self.target_position))
37 | 
38 | 
39 |     def resetPosition(self):
40 |         self.position = 0
41 | 
42 | 
43 |     def iterate(self, action):
44 |         # Action 0 is go L, action 1 is go R.
45 |         add_x = (action - 0.5)*2
46 |         # maps 0,1 to -1,1
47 |         self.position += add_x*self.step_size
48 |         self.position = max(self.position, self.lims[0] + self.step_size)
49 |         self.position = min(self.position, self.lims[1] - self.step_size)
50 |         return(self.reward(), self.getStateVec(), False)
51 | 
52 | 
53 | 
54 |     def reward(self):
55 | 
56 |         if abs(self.position - self.target_position) <= 1.2*self.step_size:
57 |             self.resetTarget()
58 |             return(1.0)
59 |         else:
60 |             return(-0.01)
61 | 
62 | 
63 | 
64 |     def drawState(self, ax):
65 | 
66 |         ax.clear()
67 |         ax.set_xlim(tuple(self.lims))
68 |         ax.set_ylim(tuple(self.lims))
69 | 
70 |         ax.set_xlabel('x')
71 |         ax.set_ylabel('y')
72 |         ax.set_aspect('equal')
73 | 
74 |         ag = plt.Circle((self.position, 0), 0.03, color='tomato')
75 |         ax.add_artist(ag)
76 | 
77 |         if self.target_position is not None:
78 |             target = plt.Circle((self.target_position, 0), 0.03, color='seagreen')
79 |             ax.add_artist(target)
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | #
88 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Experiments with playing OpenAI games with NEAT
 3 | ==========================================
 4 | 
 5 | <p align="center">
 6 |   <img width="800" height="367" src="misc/cover_img_neat.png">
 7 | </p>
 8 | 
 9 | Overview
10 | --------------------------------
11 | 
12 | This project uses Kenneth Stanley's popular [NEAT framework](https://en.wikipedia.org/wiki/Neuroevolution_of_augmenting_topologies) to evolve neural networks to play OpenAI gym games. `Node` objects are the basic unit of `EPANN` objects, a collection of which forms the `Population` object. Tests to compare different population parameters can be done with `PopTests.py`.
13 | 
14 | 
15 | Main scripts
16 | -------------------------------
17 | 
18 | These are the scripts I run, that use the classes described below. They live in the main dir. A brief description:
19 | 
20 | * `ev1.py` - Creates a `Population` object for a given class and evolves the population.
21 | * `ablation_test.py` - Takes a fully formed network and repeatedly removes a connection and then evaluates the FF of that network, starting from the smallest connection weight up, to find the critical ones.
22 | * `movie_combine.py` - Combines several movie files into a single movie file in a grid format.
23 | * `createEnvJson.py` - Writes a dict of info about the gym envs (or other envs) to use for labels, etc
24 | 
25 | 
26 | Classes
27 | --------------------------------
28 | 
29 | * `Node.py` - The basic unit of the network. Can be set to be an input, output, or bias node.
30 | * `EPANN.py` - The network. It starts from just input, bias, and output nodes. The number of I/O nodes are based on the number of inputs/outputs of the agent class being simulated. Nodes and connections are then added and removed via mutations. It is also responsible for running an episode of the agent class to evaluate the FF.
31 | * `Population.py` - This creates a population of `EPANN` objects and repeatedly evaluates their FF's, sorts them via their FF's, and then keeps and mutates the best of the population. This is all done via its `evolve()` function.
32 | * `PopTests.py` - This does several evolutions of different `Population` objects, with different parameters, to compare how different population parameters (trading off `N_gen` vs `N_pop` for example) affects evolution.
33 | * `GymAgent.py` - This is a generic wrapper class for `gym` agents.
34 | * `FileSystemTools.py` - Just a little list of custom functions that I found myself using often, mostly string formatting type stuff.
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | #
45 | 


--------------------------------------------------------------------------------
/ev1.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('./classes')
  3 | from EPANN import EPANN
  4 | from Population import Population
  5 | import RunTools as rt
  6 | from GymAgent import GymAgent
  7 | from time import time
  8 | import numpy as np
  9 | 
 10 | ea = EPANN(agent_class=GymAgent, env_name='CartPole')
 11 | 
 12 | #ea.plotNetwork()
 13 | 
 14 | ea.addConnectingWeight((0,4))
 15 | ea.addConnectingWeight((1,4))
 16 | ea.addNodeInBetween(1,4)
 17 | 
 18 | ea.addConnectingWeight((2,5))
 19 | ea.addNode()
 20 | ea.addConnectingWeight((2,6))
 21 | ea.addConnectingWeight((3,4))
 22 | ea.addConnectingWeight((3,5))
 23 | ea.addConnectingWeight((6,5))
 24 | #ea.addAtomInBetween((2,5))
 25 | 
 26 | 
 27 | N_tests = 100000
 28 | 
 29 | inputs = np.random.random((N_tests, 4))
 30 | 
 31 | st = time()
 32 | for i in range(N_tests):
 33 | 
 34 |     ea.forwardPass(inputs[i])
 35 | 
 36 | 
 37 | print('time elapsed:', time() - st)
 38 | 
 39 | #ea.plotNetwork()
 40 | 
 41 | 
 42 | 
 43 | 
 44 | exit()
 45 | 
 46 | 
 47 | p1 = Population(agent_class=GymAgent, env_name='CartPole', N_pop=64, mut_type='change_topo', std=1.0, render_type='gym')
 48 | 
 49 | p1.evolve(N_gen=128, N_episode_steps=200, N_trials_per_agent=2, N_runs_with_best=2, record_final_runs=False, show_final_runs=False)
 50 | 
 51 | exit(0)
 52 | 
 53 | 
 54 | 
 55 | 
 56 | evolve_params = {
 57 | 'N_runs' : 3,
 58 | 'agent_class' : GymAgent,
 59 | 'env_name' : 'LunarLander',
 60 | 'N_pop' : 64,
 61 | 'mut_type' : 'change_topo',
 62 | 'std' : [0.01, 0.1, 1, 10],
 63 | 'N_gen' : 256,
 64 | 'N_trials_per_agent' : 2,
 65 | 'N_runs_with_best' : 9,
 66 | 'record_final_runs' : True,
 67 | 'show_final_runs' : False
 68 | }
 69 | 
 70 | 
 71 | rt.varyParam(object_class=Population, run_fn=Population.evolve, run_result_var='best_individ_avg_score', **evolve_params)
 72 | 
 73 | exit()
 74 | 
 75 | 
 76 | 
 77 | 
 78 | evolve_params = {
 79 | 'N_runs' : 3,
 80 | 'agent_class' : GymAgent,
 81 | 'env_name' : 'LunarLander',
 82 | 'N_pop' : 64,
 83 | 'mut_type' : 'change_topo',
 84 | 'std' : [0.01, 0.1, 1.0, 10.0],
 85 | 'N_gen' : 256,
 86 | 'N_trials_per_agent' : 2,
 87 | 'N_runs_with_best' : 9,
 88 | 'record_final_runs' : True,
 89 | 'show_final_runs' : False
 90 | }
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | e = EPANN(agent_class=PendulumAgent)
101 | 
102 | e.loadNetworkFromFile(
103 | '/home/declan/Documents/code/evo1/misc_runs/evolve_22-01-2019_18-01-04__PendulumAgent' +
104 | '/' + 'bestNN_PendulumAgent_22-01-2019_18-01-04' + '.json'
105 | )
106 | 
107 | exit(0)
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | #
116 | 


--------------------------------------------------------------------------------
/scrap_old_test/LunarLanderAgent.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import gym
 4 | from gym import wrappers
 5 | import FileSystemTools as fst
 6 | 
 7 | '''
 8 | 
 9 | need to provide:
10 | 
11 | --state labels (for each state var)
12 | --action labels (for each action var)
13 | --N_state_terms
14 | --N_actions
15 | 
16 | functions:
17 | 
18 | --getStateVec()
19 | --initEpisode()
20 | --iterate() (returns a tuple of (reward, state, boolean isDone))
21 | 
22 | '''
23 | 
24 | 
25 | 
26 | class LunarLanderAgent:
27 | 
28 | 
29 |     def __init__(self, **kwargs):
30 | 
31 |         self.env = gym.make('LunarLander-v2')
32 |         gym.logger.set_level(40)
33 |         self.state_labels = ['pos_x', 'pos_y', 'v_x', 'v_y', 'angle', 'v_ang']
34 |         self.action_labels = ['nothing', 'engine_L', 'engine_main', 'engine_R',]
35 |         # Last two states are whether the legs are touching the ground or not.
36 |         # I'm not including them here.
37 |         self.N_state_terms = 6
38 |         self.N_actions = self.env.action_space.n
39 |         self.action_space_type = 'discrete'
40 |         self.state = self.env.reset()
41 |         dt = fst.getDateString()
42 |         self.base_name = f'LunarLander_{dt}'
43 |         self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/')
44 |         self.monitor_is_on = False
45 | 
46 | 
47 | 
48 | 
49 |     def setMonitorOn(self):
50 |         # It seems like when I call this, it gives a warning about the env not being
51 |         # made with gym.make (which it is...), but if I call it only once for the same
52 |         # agent, it doesn't run it every time I call it?
53 |         #if not self.monitor_is_on:
54 |         #
55 |         # Also, it seems like you can't record the episode without showing it on the screen.
56 |         # See https://github.com/openai/gym/issues/347 maybe?
57 |         if True:
58 |             self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name)
59 |             self.env = wrappers.Monitor(self.env, self.record_dir)
60 |             self.monitor_is_on = True
61 | 
62 | 
63 |     def getStateVec(self):
64 |         return(self.state[:self.N_state_terms])
65 | 
66 | 
67 |     def initEpisode(self):
68 |         self.state = self.env.reset()
69 | 
70 | 
71 |     def iterate(self, action):
72 |         # Action 0 is go L, action 1 is go R.
73 |         observation, reward, done, info = self.env.step(action)
74 |         self.state = observation
75 | 
76 |         return(reward, self.state, done)
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 |     def drawState(self):
84 | 
85 |         self.env.render()
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | #
94 | 


--------------------------------------------------------------------------------
/scrap_old_test/CartPoleAgent.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import gym
  4 | from gym import wrappers
  5 | import FileSystemTools as fst
  6 | 
  7 | '''
  8 | 
  9 | need to provide:
 10 | 
 11 | --state labels (for each state var)
 12 | --action labels (for each action var)
 13 | --N_state_terms
 14 | --N_actions
 15 | --action_space_type
 16 | 
 17 | functions:
 18 | 
 19 | --getStateVec()
 20 | --initEpisode()
 21 | --iterate() (returns a tuple of (reward, state, boolean isDone))
 22 | 
 23 | '''
 24 | 
 25 | 
 26 | 
 27 | class CartPoleAgent:
 28 | 
 29 | 
 30 |     def __init__(self, **kwargs):
 31 | 
 32 |         self.env = gym.make('CartPole-v0')
 33 |         gym.logger.set_level(40)
 34 |         self.state_labels = ['pos_cart', 'v_cart','pole_angle', 'v_poletip']
 35 |         self.action_labels = ['cart_L', 'cart_R',]
 36 |         # Last two states are whether the legs are touching the ground or not.
 37 |         # I'm not including them here.
 38 |         self.N_state_terms = len(self.env.reset())
 39 |         self.N_actions = self.env.action_space.n
 40 |         self.action_space_type = 'discrete'
 41 |         self.state = self.env.reset()
 42 |         dt = fst.getDateString()
 43 |         self.base_name = f'CartPole_{dt}'
 44 |         self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/')
 45 |         self.monitor_is_on = False
 46 | 
 47 | 
 48 |     def setMaxEpisodeSteps(self, N_steps):
 49 | 
 50 |         self.env._max_episode_steps = N_steps
 51 |         self.env.spec.max_episode_steps = N_steps
 52 |         self.env.spec.timestep_limit = N_steps
 53 | 
 54 | 
 55 |     def setMonitorOn(self):
 56 |         # It seems like when I call this, it gives a warning about the env not being
 57 |         # made with gym.make (which it is...), but if I call it only once for the same
 58 |         # agent, it doesn't run it every time I call it?
 59 |         #if not self.monitor_is_on:
 60 |         #
 61 |         # Also, it seems like you can't record the episode without showing it on the screen.
 62 |         # See https://github.com/openai/gym/issues/347 maybe?
 63 |         if True:
 64 |             self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name)
 65 |             self.env = wrappers.Monitor(self.env, self.record_dir)
 66 |             self.monitor_is_on = True
 67 | 
 68 | 
 69 |     def getStateVec(self):
 70 |         return(self.state[:self.N_state_terms])
 71 | 
 72 | 
 73 |     def initEpisode(self):
 74 |         self.state = self.env.reset()
 75 | 
 76 | 
 77 |     def iterate(self, action):
 78 |         # Action 0 is go L, action 1 is go R.
 79 |         observation, reward, done, info = self.env.step(action)
 80 |         self.state = observation
 81 | 
 82 |         return(reward, self.state, done)
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 |     def drawState(self):
 90 | 
 91 |         self.env.render()
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | #
100 | 


--------------------------------------------------------------------------------
/scrap_old_test/PendulumAgent.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import gym
  4 | from gym import wrappers
  5 | import FileSystemTools as fst
  6 | 
  7 | '''
  8 | 
  9 | need to provide:
 10 | 
 11 | --state labels (for each state var)
 12 | --action labels (for each action var)
 13 | --N_state_terms
 14 | --N_actions
 15 | 
 16 | functions:
 17 | 
 18 | --getStateVec()
 19 | --initEpisode()
 20 | --iterate() (returns a tuple of (reward, state, boolean isDone))
 21 | --setMaxEpisodeSteps()
 22 | 
 23 | '''
 24 | 
 25 | 
 26 | 
 27 | class PendulumAgent:
 28 | 
 29 | 
 30 |     def __init__(self, **kwargs):
 31 | 
 32 |         self.env = gym.make('Pendulum-v0')
 33 |         gym.logger.set_level(40)
 34 |         self.state_labels = ['cos(ang)', 'sin(ang)', 'ang_vel']
 35 |         self.action_labels = ['torque']
 36 |         # Last two states are whether the legs are touching the ground or not.
 37 |         # I'm not including them here.
 38 |         self.N_state_terms = len(self.env.reset())
 39 |         self.N_actions = 1
 40 |         self.action_space_type = 'continuous'
 41 |         self.state = self.env.reset()
 42 |         dt = fst.getDateString()
 43 |         self.base_name = f'Pendulum_{dt}'
 44 |         self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/')
 45 |         self.monitor_is_on = False
 46 | 
 47 | 
 48 | 
 49 |     def setMaxEpisodeSteps(self, N_steps):
 50 | 
 51 |         self.env._max_episode_steps = N_steps
 52 |         self.env.spec.max_episode_steps = N_steps
 53 |         self.env.spec.timestep_limit = N_steps
 54 | 
 55 | 
 56 |     def closeEnv(self):
 57 |         # This doesn't seem to be a good idea to use with monitor?
 58 |         self.env.close()
 59 | 
 60 | 
 61 |     def setMonitorOn(self, show_run=True):
 62 |         # It seems like when I call this, it gives a warning about the env not being
 63 |         # made with gym.make (which it is...), but if I call it only once for the same
 64 |         # agent, it doesn't run it every time I call it?
 65 |         #if not self.monitor_is_on:
 66 |         #
 67 |         # Also, it seems like you can't record the episode without showing it on the screen.
 68 |         # See https://github.com/openai/gym/issues/347 maybe?
 69 |         if True:
 70 |             self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name)
 71 |             if show_run:
 72 |                 self.env = wrappers.Monitor(self.env, self.record_dir)
 73 |             else:
 74 |                 self.env = wrappers.Monitor(self.env, self.record_dir, video_callable=False, force=True)
 75 |             self.monitor_is_on = True
 76 | 
 77 | 
 78 |     def getStateVec(self):
 79 |         return(self.state[:self.N_state_terms])
 80 | 
 81 | 
 82 |     def initEpisode(self):
 83 |         self.state = self.env.reset()
 84 | 
 85 | 
 86 |     def iterate(self, action):
 87 |         # Action 0 is go L, action 1 is go R.
 88 |         observation, reward, done, info = self.env.step(action)
 89 |         self.state = observation
 90 | 
 91 |         return(reward, self.state, done)
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 |     def drawState(self):
 99 | 
100 |         self.env.render()
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | #
109 | 


--------------------------------------------------------------------------------
/ablation_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('./classes')
 3 | from EPANN import EPANN
 4 | from GymAgent import GymAgent
 5 | import matplotlib.pyplot as plt
 6 | import FileSystemTools as fst
 7 | import numpy as np
 8 | 
 9 | params = {}
10 | params['env_name'] = 'Pendulum'
11 | 
12 | e = EPANN(agent_class=GymAgent, env_name=params['env_name'])
13 | 
14 | path = '/home/declan/Documents/code/evo1/save_runs/evolve_23-01-2019_18-45-18__GymAgentPendulum_good'
15 | params['NN_file'] = fst.combineDirAndFile(path, 'bestNN_GymAgent_23-01-2019_18-45-18' + '.json')
16 | 
17 | e.loadNetworkFromFile(params['NN_file'])
18 | 
19 | datetime_str = fst.getDateString()
20 | dir = fst.combineDirAndFile('misc_runs', 'ablation_{}_{}'.format(datetime_str, params['env_name']))
21 | fst.makeDir(dir)
22 | plot_dir = fst.makeDir(fst.combineDirAndFile(dir, 'plots'))
23 | 
24 | log_output_str = ''
25 | 
26 | params['N_runs_per_NN'] = 50
27 | params['N_episode_steps'] = e.agent.max_episode_steps
28 | 
29 | params['N_weights_to_remove'] = len(e.weights_list)
30 | 
31 | ablation_FF_mean_std = []
32 | 
33 | for w_removed in range(params['N_weights_to_remove']):
34 | 
35 |     # Run the ablation for several times to get stats
36 |     ablation_scores = []
37 |     for run in range(params['N_runs_per_NN']):
38 |         ablation_scores.append(e.runEpisode(params['N_episode_steps']))
39 | 
40 |     # Add the mean and std
41 |     ablation_FF_mean_std.append([w_removed, np.mean(ablation_scores), np.std(ablation_scores)])
42 | 
43 |     # Save what the NN currently looks like
44 |     NN_save_fname = fst.combineDirAndFile(plot_dir, 'NN_plot_{}w_removed.png'.format(w_removed))
45 |     e.plotNetwork(show_plot=False, save_plot=True, fname=NN_save_fname, node_legend=True)
46 | 
47 |     # Remove the next smallest weight
48 |     smallest_weight_connection = min(e.weights_dict, key=lambda x: abs(e.weights_dict.get(x)))
49 |     remove_str = 'Removing weight {} that has value {:.3f}\n'.format(smallest_weight_connection, e.weights_dict[smallest_weight_connection])
50 |     print(remove_str)
51 |     log_output_str += remove_str
52 |     e.removeConnectingWeight(smallest_weight_connection)
53 | 
54 | 
55 | 
56 | # Save params
57 | fst.writeDictToFile(params, fst.combineDirAndFile(dir, 'Params_logfile_{}.log'.format(datetime_str)))
58 | 
59 | # Save weight order removal
60 | removal_log_fname = fst.combineDirAndFile(dir, 'Weight_remove_order_{}.txt'.format(datetime_str))
61 | with open(removal_log_fname, 'w+') as f:
62 |     f.write(log_output_str)
63 | 
64 | # Plot the mean and std FF as a function of removing weights
65 | ablation_FF_mean_std = np.array(ablation_FF_mean_std)
66 | weights_removed = ablation_FF_mean_std[:, 0]
67 | FF_mean = ablation_FF_mean_std[:, 1]
68 | FF_std = ablation_FF_mean_std[:, 2]
69 | plt.fill_between(
70 | np.array(range(len(FF_mean))),
71 | FF_mean - FF_std,
72 | FF_mean + FF_std,
73 | facecolor='dodgerblue', alpha=0.5)
74 | 
75 | plt.plot(FF_mean, color='mediumblue')
76 | plt.xlabel('# weights removed')
77 | plt.ylabel('FF')
78 | plt.title('Ablation test, FF over {} episodes each'.format(params['N_runs_per_NN']))
79 | fname = fst.combineDirAndFile(dir, '{}_{}.png'.format('ablation_FF_mean-std_plot', datetime_str))
80 | plt.savefig(fname)
81 | 
82 | # Save mean/std
83 | fname = fst.combineDirAndFile(dir, '{}_{}.txt'.format('ablation_FF_mean-std', datetime_str))
84 | np.savetxt(fname, ablation_FF_mean_std, fmt='%.4f')
85 | 
86 | 
87 | 
88 | 
89 | #
90 | 


--------------------------------------------------------------------------------
/movie_combine.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('./classes')
  3 | from moviepy.editor import VideoFileClip, clips_array, vfx
  4 | import argparse
  5 | import FileSystemTools as fst
  6 | import glob
  7 | import subprocess
  8 | import os
  9 | 
 10 | 
 11 | def combineMovieFiles(**kwargs):
 12 | 
 13 |     path = kwargs.get('path', None)
 14 |     file_type = kwargs.get('file_type', 'mp4')
 15 |     grid_size = kwargs.get('grid_size', '1x1')
 16 |     make_gif = kwargs.get('make_gif', True)
 17 | 
 18 |     # get the files with the video clip extension type
 19 |     file_list = glob.glob(fst.addTrailingSlashIfNeeded(path) + '*' + file_type)
 20 |     print('{} files of type {} found'.format(len(file_list), file_type))
 21 | 
 22 |     # make sure you've passed a grid size argument
 23 |     assert grid_size != '0', 'need to provide a grid_size arg of form <height int>x<width int>'
 24 | 
 25 |     try:
 26 |         grid_dims = [int(y) for y in grid_size.split('x')]
 27 |         grid_height, grid_width = grid_dims[0], grid_dims[1]
 28 |         N_movie_panels = grid_height*grid_width
 29 |         print('need {} movie files for a grid of size {}'.format(N_movie_panels, grid_size))
 30 |     except:
 31 |         print('something wrong with grid_size argument, should be of form 5x8 (or similar)')
 32 |         exit()
 33 | 
 34 |     # take only the first N video files, no choosing process. It will use ones created
 35 |     # from running this program previously if they're there, so be careful.
 36 |     files_used = file_list[:N_movie_panels]
 37 | 
 38 |     clip_list = []
 39 |     clip_matrix = []
 40 | 
 41 |     # create a list of the video file clip objects, with a small margin around each
 42 |     for f in files_used:
 43 |         clip1 = VideoFileClip(f).margin(10)
 44 |         #clip1 = clip1.resize(0.50)
 45 |         clip_list.append(clip1)
 46 | 
 47 |     # put them into a list of lists, ie, a matrix, in the shape you want them to finally be
 48 |     for y in range(grid_height):
 49 |         temp_list = []
 50 |         for x in range(grid_width):
 51 |             temp_list.append(clip_list[y*grid_width + x])
 52 | 
 53 |         clip_matrix.append(temp_list)
 54 | 
 55 |     print('size of clip_matrix:', len(clip_matrix), len(clip_matrix[0]))
 56 |     final_clip = clips_array(clip_matrix) # put the clips side by side
 57 | 
 58 |     # fname stuff
 59 |     dt_string = fst.getDateString()
 60 |     base_fname = 'COMBINED_{}_{}'.format(grid_size, dt_string)
 61 |     movie_output_fname = fst.combineDirAndFile(path, '{}.{}'.format(base_fname, file_type))
 62 | 
 63 |     final_clip.write_videofile(movie_output_fname) # create the combined video file!
 64 | 
 65 |     if make_gif:
 66 | 
 67 |         px_size = 1260
 68 |         fps = 30
 69 | 
 70 |         gif_output_fname = fst.combineDirAndFile(path, '{}.gif'.format(base_fname))
 71 | 
 72 |         palette_fname = 'palette.png'
 73 | 
 74 |         create_palette_cmd = 'ffmpeg -y  -i {} -vf fps={},scale={}:-1:flags=lanczos,palettegen {}'.format(movie_output_fname, fps, px_size, palette_fname)
 75 |         create_gif_cmd = 'ffmpeg -i {} -i {} -filter_complex "fps={},scale={}:-1:flags=lanczos[x];[x][1:v]paletteuse" {}'.format(movie_output_fname, palette_fname, fps, px_size, gif_output_fname)
 76 | 
 77 |         os.system(create_palette_cmd)
 78 |         os.system(create_gif_cmd)
 79 | 
 80 |         remove_palette_cmd = f'rm {palette_fname}'
 81 |         remove_movie_cmd = f'rm {movie_output_fname}'
 82 | 
 83 |         os.system(remove_palette_cmd)
 84 |         os.system(remove_movie_cmd)
 85 | 
 86 | 
 87 | if __name__ == '__main__':
 88 | 
 89 |     # arguments to be read in via CLI
 90 |     parser = argparse.ArgumentParser()
 91 |     parser.add_argument('path')
 92 |     parser.add_argument('--grid_size', default='0')
 93 |     parser.add_argument('--file_type', default='mp4')
 94 |     parser.add_argument('--gif', action='store_true', default=False)
 95 |     args = parser.parse_args()
 96 | 
 97 |     kwargs = {}
 98 |     kwargs['path'] = args.path
 99 |     kwargs['file_type'] = args.file_type
100 |     kwargs['grid_size'] = args.grid_size
101 |     kwargs['make_gif'] = args.gif
102 | 
103 |     combineMovieFiles(**kwargs)
104 | 
105 | 
106 | 
107 | 
108 | 
109 | #
110 | 


--------------------------------------------------------------------------------
/classes/GymAgent.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('./classes')
  3 | import numpy as np
  4 | import gym
  5 | from gym import wrappers
  6 | import FileSystemTools as fst
  7 | import json
  8 | 
  9 | 
 10 | '''
 11 | 
 12 | This is a generalized agent for OpenAI gym environments.
 13 | I'm doing it because I had to create diff. agents for each environment,
 14 | when they really just need a few specific things for each. So now you just pass it
 15 | env_name in the kwargs, and it will look up in a json file the right stuff.
 16 | 
 17 | Here, the env_name you pass it will be something like 'Pendulum', not Pendulum-v0,
 18 | because I don't want to have to deal with remembering versions.
 19 | 
 20 | See createEnvJson.py and loadEnvJson() for details.
 21 | 
 22 | need to provide:
 23 | 
 24 | --state labels (for each state var)
 25 | --action labels (for each action var)
 26 | --gym_env_name
 27 | --action_space_type
 28 | 
 29 | functions:
 30 | 
 31 | --getStateVec()
 32 | --initEpisode()
 33 | --iterate() (returns a tuple of (reward, state, boolean isDone))
 34 | --setMaxEpisodeSteps()
 35 | 
 36 | '''
 37 | 
 38 | 
 39 | 
 40 | class GymAgent:
 41 | 
 42 | 
 43 |     def __init__(self, **kwargs):
 44 | 
 45 |         self.env_name = kwargs.get('env_name', None)
 46 |         assert self.env_name is not None, 'Need to provide an env_name argument!'
 47 | 
 48 |         # Load all the properties for this env.
 49 |         self.loadEnvJson(self.env_name)
 50 |         # Create the env
 51 |         self.env = gym.make(self.gym_env_name)
 52 |         self.setMaxEpisodeSteps(self.max_episode_steps)
 53 |         gym.logger.set_level(40)
 54 | 
 55 |         self.state = self.env.reset() # Should I be doing this here? sometimes trouble with resetting when done=False
 56 |         dt = fst.getDateString()
 57 |         self.base_name = f'{self.env_name}_{dt}'
 58 |         self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/')
 59 |         self.monitor_is_on = False
 60 | 
 61 | 
 62 |     def setMaxEpisodeSteps(self, N_steps):
 63 | 
 64 |         self.env._max_episode_steps = N_steps
 65 |         self.env.spec.max_episode_steps = N_steps
 66 |         self.env.spec.timestep_limit = N_steps
 67 | 
 68 | 
 69 |     def closeEnv(self):
 70 |         # This doesn't seem to be a good idea to use with monitor?
 71 |         self.env.close()
 72 |         #self.env.render(close=True)
 73 | 
 74 | 
 75 |     def setMonitorOn(self, show_run=True):
 76 |         # It seems like when I call this, it gives a warning about the env not being
 77 |         # made with gym.make (which it is...), but if I call it only once for the same
 78 |         # agent, it doesn't run it every time I call it?
 79 |         #if not self.monitor_is_on:
 80 |         #
 81 |         # Also, it seems like you can't record the episode without showing it on the screen.
 82 |         # See https://github.com/openai/gym/issues/347 maybe?
 83 | 
 84 |         self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name)
 85 |         #if show_run:
 86 |         if True:
 87 |             self.env = wrappers.Monitor(self.env, self.record_dir)
 88 |         else:
 89 |             self.env = wrappers.Monitor(self.env, self.record_dir, video_callable=False, force=True)
 90 |         self.monitor_is_on = True
 91 | 
 92 | 
 93 |     def getStateVec(self):
 94 |         return(self.state[:self.N_state_terms])
 95 | 
 96 | 
 97 |     def initEpisode(self):
 98 |         self.state = self.env.reset()
 99 | 
100 | 
101 |     def iterate(self, action):
102 |         # Action 0 is go L, action 1 is go R.
103 |         observation, reward, done, info = self.env.step(action)
104 |         self.state = observation
105 | 
106 |         return(reward, self.state, done)
107 | 
108 | 
109 | 
110 |     def drawState(self):
111 |         self.env.render()
112 | 
113 | 
114 |     def loadEnvJson(self, env_name):
115 | 
116 |         with open('gym_env_info.json') as json_file:
117 |             env_info_dict = json.load(json_file)
118 | 
119 |         env_info = env_info_dict[env_name]
120 | 
121 |         self.gym_env_name = env_info['gym_env_name']
122 |         self.state_labels = env_info['state_labels']
123 |         self.action_labels = env_info['action_labels']
124 |         self.action_space_type = env_info['action_space_type']
125 |         self.max_episode_steps = env_info['max_episode_steps']
126 |         self.N_state_terms = len(self.state_labels)
127 |         self.N_actions = len(self.action_labels)
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | #
136 | 


--------------------------------------------------------------------------------
/classes/Node.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('./classes')
  3 | import numpy as np
  4 | from math import exp, tanh
  5 | from copy import copy
  6 | 
  7 | 
  8 | class Node:
  9 | 
 10 |     def __init__(self, node_index):
 11 | 
 12 |         self.is_input_node = False
 13 |         self.is_output_node = False
 14 |         self.is_bias_node = False
 15 |         self.is_memory_node = False
 16 | 
 17 |         self.node_index = node_index
 18 | 
 19 |         self.input_indices = []
 20 | 
 21 |         self.inputs_received = []
 22 | 
 23 |         self.output_weights = {}
 24 | 
 25 |         self.value = None
 26 | 
 27 | 
 28 |     def setToInputNode(self):
 29 |         self.is_input_node = True
 30 | 
 31 | 
 32 |     def setToOutputNode(self):
 33 |         self.is_output_node = True
 34 | 
 35 | 
 36 |     def setToBiasNode(self):
 37 |         self.is_bias_node = True
 38 |         self.value = 1.0
 39 | 
 40 | 
 41 |     def setToMemoryNode(self):
 42 |         self.is_memory_node = True
 43 |         self.value = 0.0
 44 | 
 45 | 
 46 |     def getValue(self):
 47 | 
 48 |         if self.value is not None:
 49 |             return(self.value)
 50 |         else:
 51 |             if self.is_output_node:
 52 |                 tot = sum(self.inputs_received)
 53 |                 self.value = tot
 54 |                 return(self.value)
 55 |             elif self.is_bias_node:
 56 |                 pass
 57 |             elif self.is_memory_node:
 58 |                 pass
 59 |             elif self.is_input_node:
 60 |                 # For now, I'm just gonna set the input nodes directly via the .output value.
 61 |                 return(self.value)
 62 |             else:
 63 |                 tot = sum(self.inputs_received)
 64 |                 self.value = self.nonlinear(tot)
 65 |                 return(self.value)
 66 | 
 67 | 
 68 | 
 69 |     def calculateNodeValue(self):
 70 |         if self.is_output_node:
 71 |             tot = sum(self.inputs_received.values())
 72 |             self.value = tot
 73 |         elif self.is_bias_node:
 74 |             pass
 75 |         elif self.is_memory_node:
 76 |             pass
 77 |         elif self.is_input_node:
 78 |             # For now, I'm just gonna set the input nodes directly via the .output value.
 79 |             pass
 80 |         else:
 81 |             tot = sum(self.inputs_received.values())
 82 |             self.value = self.nonlinear(tot)
 83 | 
 84 | 
 85 |     def clearInputs(self):
 86 |         if not self.is_input_node:
 87 |             self.inputs_received = []
 88 | 
 89 | 
 90 |     def clearNode(self):
 91 |         self.clearInputs()
 92 |         self.value = None
 93 | 
 94 | 
 95 |     def setRandomOutputWeights(self):
 96 |         weights = np.random.normal(size=self.getNOutputs(), scale=0.1)
 97 |         self.output_weights = dict(zip(self.getOutputIndices(), weights))
 98 | 
 99 | 
100 |     def removeFromInputIndices(self, ind):
101 |         self.input_indices.remove(ind)
102 | 
103 |     def removeFromOutputWeights(self, ind):
104 |         del self.output_weights[ind]
105 | 
106 |     def addToInputIndices(self, ind):
107 |         self.input_indices.append(ind)
108 | 
109 | 
110 |     def changeOutputWeightInd(self, old_ind, new_ind):
111 |         weight = self.output_weights.pop(old_ind)
112 |         self.output_weights[new_ind] = weight
113 | 
114 |     def addToOutputWeights(self, new_output_ind, val=None, std=0.1):
115 |         if val is not None:
116 |             self.output_weights[new_output_ind] = val
117 |         else:
118 |             self.output_weights[new_output_ind] = np.random.normal(scale=std)
119 | 
120 | 
121 |     def mutateOutputWeight(self, ind, std=0.1):
122 |         self.output_weights[ind] += np.random.normal(scale=std)
123 | 
124 | 
125 |     def getOutputIndices(self):
126 |         return(list(self.output_weights.keys()))
127 | 
128 | 
129 |     def getNInputs(self):
130 |         return(len(self.input_indices))
131 | 
132 |     def getNOutputs(self):
133 |         return(len(self.output_weights))
134 | 
135 | 
136 |     def getOutputWeightStr(self):
137 |         w_str = ', '.join(['{}: {:.3f}'.format(k,v) for k,v in self.output_weights.items()])
138 |         s = '[{}]'.format(w_str)
139 |         return(s)
140 | 
141 |     def setOutputIndices(self, ind_list):
142 |         self.output_weights = dict(zip(copy(ind_list), [0]*len(ind_list)))
143 | 
144 | 
145 |     def setInputIndices(self, ind_list):
146 |         self.input_indices = copy(ind_list)
147 |         self.clearInputs()
148 | 
149 | 
150 |     def allInputsReceived(self):
151 | 
152 |         #if self.input_indices is None:
153 |         if len(self.input_indices) == 0:
154 |             return(True)
155 | 
156 |         # checks if there are any None's left in the list. If there aren't, it has all inputs
157 |         # and is ready to proceed.
158 |         if list(self.inputs_received.values()).count(None)==0:
159 |             return(True)
160 |         else:
161 |             return(False)
162 | 
163 | 
164 | 
165 | 
166 |     def addToInputsReceived(self, val):
167 |         self.inputs_received.append(val)
168 | 
169 | 
170 |     def nonlinear(self, x):
171 | 
172 |         # Let's start with a nice simple sigmoid.
173 | 
174 |         #sigmoid = 1/(1 + exp(-x))
175 |         #relu = max(0, x)
176 |         tanh_x = tanh(x)
177 | 
178 |         return(tanh_x)
179 | 
180 | 
181 | #
182 | 


--------------------------------------------------------------------------------
/scrap_old_test/PuckworldAgent.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | from math import sqrt
  4 | 
  5 | 
  6 | 
  7 | class PuckworldAgent:
  8 | 
  9 | 
 10 |     def __init__(self, **kwargs):
 11 | 
 12 |         self.xlims = kwargs.get('xlims', np.array([-0.5,0.5]))
 13 |         self.ylims = kwargs.get('ylims', np.array([-0.5,0.5]))
 14 |         self.lims = np.array((self.xlims,self.ylims))
 15 |         self.max_dist = sqrt(np.ptp(self.xlims)**2 + np.ptp(self.ylims)**2)
 16 |         self.a = kwargs.get('a',1.0)
 17 |         self.drag = kwargs.get('drag', 0.5)
 18 |         self.time_step = kwargs.get('dt',10**-1)
 19 |         self.reward_type = kwargs.get('reward','sparse')
 20 | 
 21 |         self.passed_params = {}
 22 |         check_params = ['a', 'drag', 'dt', 'reward']
 23 |         for param in check_params:
 24 |             if kwargs.get(param, None) is not None:
 25 |                 self.passed_params[param] = kwargs.get(param, None)
 26 | 
 27 |         self.N_actions = 4
 28 | 
 29 | 
 30 |         self.circ_rad = np.ptp(self.xlims)/20.0
 31 |         self.target_rad = 1*self.circ_rad
 32 |         self.resetTarget()
 33 | 
 34 |         self.pos0 = np.array([self.xlims.mean()/2.0,self.ylims.mean()/2.0])
 35 |         self.v0 = np.array([0.0,0.0])
 36 |         self.resetStateValues()
 37 |         self.accel_array = np.array([[0,1],[0,-1],[-1,0],[1,0]])
 38 | 
 39 | 
 40 | 
 41 |         self.N_state_terms = len(self.getStateVec())
 42 | 
 43 | 
 44 | 
 45 |     def puckTargetDist(self):
 46 |         return(sqrt(np.sum((self.pos-self.target)**2)))
 47 | 
 48 | 
 49 |     def addToHist(self):
 50 |         self.pos_hist = np.concatenate((self.pos_hist,[self.pos]))
 51 |         self.v_hist = np.concatenate((self.v_hist,[self.v]))
 52 |         self.t.append(self.t[-1] + self.time_step)
 53 |         self.r_hist.append(self.reward())
 54 | 
 55 | 
 56 |     def resetTarget(self):
 57 | 
 58 |         self.target = self.target_rad + self.lims[:,0] + np.random.random((2,))*(np.ptp(self.lims,axis=1)-2*self.target_rad)
 59 | 
 60 | 
 61 |     def iterateEuler(self,action):
 62 | 
 63 |         #this uses the Euler-Cromer method to move.
 64 | 
 65 |         #Right now I'm just gonna make it sit against a wall if it goes to the
 66 |         #boundary, but it might be cool to make periodic bry conds, to see if it would
 67 |         #learn to zoom around it.
 68 | 
 69 |         a = self.actionToAccel(action) - self.drag*self.v
 70 | 
 71 |         v_next = self.v + a*self.time_step
 72 |         pos_next = self.pos + v_next*self.time_step
 73 | 
 74 |         #To handle the walls
 75 |         for i in [0,1]:
 76 |             if pos_next[i] < (self.lims[i,0] + self.circ_rad):
 77 |                 pos_next[i] = self.lims[i,0] + self.circ_rad
 78 |                 # This makes it "bounce" off the wall, so it keeps momentum.
 79 |                 #v_next[i] = -v_next[i]
 80 |                 # This makes it "stick" to the wall.
 81 |                 v_next[i] = 0
 82 | 
 83 |             if pos_next[i] > (self.lims[i,1] - self.circ_rad):
 84 |                 pos_next[i] = self.lims[i,1] - self.circ_rad
 85 |                 #v_next[i] = -v_next[i]
 86 |                 v_next[i] = 0
 87 | 
 88 |         self.pos = pos_next
 89 |         self.v = v_next
 90 |         self.addToHist()
 91 | 
 92 | 
 93 |     def actionToAccel(self,action):
 94 |         self.a_hist.append(action)
 95 |         return(self.a*self.accel_array[action])
 96 | 
 97 | 
 98 | 
 99 |     ###################### Required agent functions
100 | 
101 | 
102 |     def getPassedParams(self):
103 |         #This returns a dict of params that were passed to the agent, that apply to the agent.
104 |         #So if you pass it a param for 'reward', it will return that, but it won't return the
105 |         #default val if you didn't pass it.
106 |         return(self.passed_params)
107 | 
108 | 
109 |     def getStateVec(self):
110 |         assert self.target is not None, 'Need target to get state vec'
111 |         return(np.concatenate((self.pos,self.v,self.target)))
112 | 
113 | 
114 |     def reward(self):
115 | 
116 |         assert self.target is not None, 'Need a target'
117 | 
118 |         max_R = 1
119 | 
120 |         if self.reward_type == 'sparse':
121 |             if self.puckTargetDist() <= (self.target_rad + self.circ_rad):
122 |                 return(max_R)
123 |             else:
124 |                 return(-0.01)
125 | 
126 |         if self.reward_type == 'shaped':
127 |             #return(max_R*(self.max_dist/2.0 - self.puckTargetDist()))
128 |             #These numbers will probably have to change if a, dt, or the dimensions change.
129 |             return(-0.5*self.puckTargetDist() + 0.4)
130 | 
131 | 
132 |     def initEpisode(self):
133 |         self.resetStateValues()
134 |         self.resetTarget()
135 | 
136 | 
137 |     def iterate(self,action):
138 |         self.iterateEuler(action)
139 | 
140 |         r = self.reward()
141 |         if r > 0:
142 |             self.resetTarget()
143 | 
144 |         return(r, self.getStateVec(), False)
145 | 
146 | 
147 |     def resetStateValues(self):
148 | 
149 |         self.pos = self.pos0
150 |         self.v = self.v0
151 | 
152 |         self.pos_hist = np.array([self.pos])
153 |         self.v_hist = np.array([self.v])
154 |         self.t = [0]
155 |         self.a_hist = [0]
156 |         self.r_hist = []
157 | 
158 | 
159 |     def drawState(self,ax):
160 | 
161 |         ax.clear()
162 |         ax.set_xlim(tuple(self.xlims))
163 |         ax.set_ylim(tuple(self.ylims))
164 | 
165 |         ax.set_xlabel('x')
166 |         ax.set_ylabel('y')
167 |         ax.set_aspect('equal')
168 | 
169 |         puck = plt.Circle(tuple(self.pos), self.circ_rad, color='tomato')
170 |         ax.add_artist(puck)
171 | 
172 |         if self.target is not None:
173 |             target = plt.Circle(tuple(self.target), self.target_rad, color='seagreen')
174 |             ax.add_artist(target)
175 | 
176 | 
177 |     def plotStateParams(self,axes):
178 | 
179 |         ax1 = axes[0]
180 |         ax2 = axes[1]
181 |         ax3 = axes[2]
182 |         ax4 = axes[3]
183 | 
184 |         ax1.clear()
185 |         ax1.plot(self.pos_hist[:,0][-1000:],label='x')
186 |         ax1.plot(self.pos_hist[:,1][-1000:],label='y')
187 |         ax1.legend()
188 | 
189 |         ax2.clear()
190 |         ax2.plot(self.a_hist[-1000:],label='a')
191 |         ax2.set_yticks([0,1,2,3])
192 |         ax2.set_yticklabels(['U','D','L','R'])
193 |         ax2.legend()
194 | 
195 | 
196 |         ax3.clear()
197 |         ax3.plot(self.r_hist[-1000:],label='R')
198 |         ax3.legend()
199 | 
200 | 
201 |         ax4.clear()
202 |         ax4.plot(self.v_hist[:,0][-1000:],label='vx')
203 |         ax4.plot(self.v_hist[:,1][-1000:],label='vy')
204 |         ax4.legend()
205 | 
206 | 
207 | 
208 | 
209 | #
210 | 


--------------------------------------------------------------------------------
/classes/FileSystemTools.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from os import mkdir
  3 | import os
  4 | from copy import copy,deepcopy
  5 | import time
  6 | import glob
  7 | import subprocess
  8 | 
  9 | def getDateString():
 10 | 	return(datetime.now().strftime('%d-%m-%Y_%H-%M-%S'))
 11 | 
 12 | 
 13 | def makeDir(dir_name):
 14 | 	# Even if this is in a library dir, it should make the dir
 15 | 	# in the script that called it.
 16 | 	mkdir(dir_name)
 17 | 	return(dir_name)
 18 | 
 19 | 
 20 | def makeDateDir(base_dir='.'):
 21 | 	# Just creates a dir with the current date for its name
 22 | 	ds = getDateString()
 23 | 	full_dir = combineDirAndFile(base_dir, ds)
 24 | 	makeDir(full_dir)
 25 | 	return(full_dir)
 26 | 
 27 | 
 28 | def makeLabelDateDir(label, base_dir='.'):
 29 | 	# You give it a label, and it creates the dir label_datestring
 30 | 	dir_name = label + '_' + getDateString()
 31 | 	full_dir = combineDirAndFile(base_dir, dir_name)
 32 | 	makeDir(full_dir)
 33 | 	return(full_dir)
 34 | 
 35 | 
 36 | def combineDirAndFile(dir, file):
 37 | 	# Adds the file to the end of dir, adding a slash in between if needed.
 38 | 	return(addTrailingSlashIfNeeded(dir) + file)
 39 | 
 40 | 
 41 | def dictPrettyPrint(in_dict):
 42 | 
 43 | 	# Formats a dict into a nice string with each k,v entry on a new line,
 44 | 	# and prints it.
 45 | 	dict_str = '{\n'
 46 | 
 47 | 	for k,v in in_dict.items():
 48 | 		dict_str += '\t{} : {}\n'.format(k, v)
 49 | 
 50 | 	dict_str += '\n}\n'
 51 | 	print(dict_str)
 52 | 
 53 | 
 54 | 
 55 | 
 56 | def dictToStringList(dict):
 57 | 	pd_copy = copy(dict)
 58 | 	for k,v in pd_copy.items():
 59 | 		if type(v).__name__ == 'float':
 60 | 			if abs(v)>10**-4:
 61 | 				pd_copy[k] = '{:.5f}'.format(v)
 62 | 			else:
 63 | 				pd_copy[k] = '{:.2E}'.format(v)
 64 | 
 65 | 	params = [str(k)+'='+str(v) for k,v in pd_copy.items() if v is not None]
 66 | 	return(params)
 67 | 
 68 | 
 69 | 
 70 | def paramDictToFnameStr(param_dict):
 71 | 	# Creates a string that can be used as an fname, separated by
 72 | 	# underscores. If a param has the value None, it isn't included.
 73 | 	params = dictToStringList(param_dict)
 74 | 	return('_'.join(params))
 75 | 
 76 | def paramDictToLabelStr(param_dict):
 77 | 	# Creates a string that can be used as an fname, separated by
 78 | 	# ', '. If a param has the value None, it isn't included.
 79 | 	params = dictToStringList(param_dict)
 80 | 	return(', '.join(params))
 81 | 
 82 | 
 83 | def listToFname(list):
 84 | 	return('_'.join(list))
 85 | 
 86 | 
 87 | def parseSingleAndListParams(param_dict, exclude_list):
 88 | 
 89 | 	# This is useful for if you want to do multiple runs, varying one or
 90 | 	# several parameters at once. exclude_list are ones you don't want to
 91 | 	# include in the parameters in the tuple.
 92 | 
 93 | 	# It returns a list of the parameters that are varied,
 94 | 	# and a list of dictionaries that can be directly passed to a function, where
 95 | 	# each one has a different set of the varied params.
 96 | 	#
 97 | 	# You should pass the args where if you don't want to vary an arg, it's just normal
 98 | 	# my_arg = 5, but if you do want to vary it, you pass it a list of the vary values, like
 99 | 	# my_arg = [1, 5, 8]. If you want to vary two at the same time, you pass them both as separate
100 | 	# lists, and it will match them up, but they need to be the same size.
101 | 
102 | 	# list_params is just a list of the params that were passed as a list, that we'll vary.
103 | 	list_params = []
104 | 	# single_params is a dict of the params that aren't varied and will have the same vals in each
105 | 	# separate run.
106 | 	single_params = {}
107 | 	# ziplist is a list of the lists for the params that are varied. So if there are two varied
108 | 	# args, each length 3, it will take these, and then below zip them to create a list of pairs.
109 | 	# arg1=[1,2,3], arg2=[2,4,8] -> ziplist=[arg1,arg2] -> param_tups=[(1,2),(2,4),(3,8)]
110 | 	ziplist = []
111 | 
112 | 
113 | 	for k,v in param_dict.items():
114 | 		if type(v).__name__ == 'list':
115 | 			list_params.append(k)
116 | 			ziplist.append(v)
117 | 		else:
118 | 			if k not in exclude_list:
119 | 				single_params[k] = v
120 | 
121 | 	param_tups = list(zip(*ziplist))
122 | 
123 | 	vary_param_dicts = []
124 | 	vary_param_tups = []
125 | 	for tup in param_tups:
126 | 		temp_dict = dict(zip(list_params,tup))
127 | 		temp_kw = {**single_params, **temp_dict}
128 | 		vary_param_tups.append(temp_dict)
129 | 		vary_param_dicts.append(temp_kw)
130 | 
131 | 	# list_params: just a list of the names of the varied ones.
132 | 	# vary_param_dicts: a list of the dicts that you can pass to each iteration, which includes the args that don't vary.
133 | 	# vary_param_tups: a list of dicts corresponding to vary_param_dicts, of only the values that change.
134 | 	return(list_params, vary_param_dicts, vary_param_tups)
135 | 
136 | 
137 | 
138 | def strfdelta(tdelta, fmt):
139 | 	d = {"days": tdelta.days}
140 | 	d["hours"], rem = divmod(tdelta.seconds, 3600)
141 | 	d["minutes"], d["seconds"] = divmod(rem, 60)
142 | 	return fmt.format(**d)
143 | 
144 | 
145 | def getCurTimeObj():
146 | 	return(datetime.now())
147 | 
148 | 
149 | def getTimeDiffNum(start_time_obj):
150 | 
151 | 	diff = datetime.timestamp(datetime.now()) - datetime.timestamp(start_time_obj)
152 | 	return(diff)
153 | 
154 | 
155 | def getTimeDiffObj(start_time_obj):
156 | 	#Gets the time diff in a nice format from the start_time_obj.
157 | 	diff = datetime.now() - start_time_obj
158 | 	return(diff)
159 | 
160 | 
161 | def getTimeDiffStr(start_time_obj):
162 | 	#Gets the time diff in a nice format from the start_time_obj.
163 | 	diff = getTimeDiffObj(start_time_obj)
164 | 
165 | 	return(strfdelta(diff,'{hours} hrs, {minutes} mins, {seconds} s'))
166 | 
167 | 
168 | def writeDictToFile(dict, fname):
169 | 	# You have to copy it here, otherwise it'll actually overwrite the values in the dict
170 | 	# you passed.
171 | 	my_dict = copy(dict)
172 | 	f = open(fname,'w+')
173 | 	for k,v in my_dict.items():
174 | 		if type(v).__name__ == 'float':
175 | 			if abs(v)>10**-4:
176 | 				my_dict[k] = '{:.5f}'.format(v)
177 | 			else:
178 | 				my_dict[k] = '{:.2E}'.format(v)
179 | 		f.write('{} = {}\n'.format(k, my_dict[k]))
180 | 
181 | 	f.close()
182 | 
183 | 
184 | def readFileToDict(fname):
185 | 	d = {}
186 | 	with open(fname) as f:
187 | 		for line in f:
188 | 			(key, val) = line.split(' = ')
189 | 			val = val.strip('\n')
190 | 			#This is to handle the fact that everything gets read in
191 | 			#as a string, but some stuff you probably want to be floats.
192 | 			try:
193 | 				val = float(val)
194 | 			except:
195 | 				val = str(val)
196 | 
197 | 			d[key] = val
198 | 
199 | 
200 | 	return(d)
201 | 
202 | 
203 | def dirFromFullPath(fname):
204 | 	# This gives you the path, stripping the local filename, if you pass it
205 | 	# a long path + filename.
206 | 	parts = fname.split('/')
207 | 	last_part = parts[-1]
208 | 	path = fname.replace(last_part,'')
209 | 	if path == '':
210 | 		return('./')
211 | 	else:
212 | 		return(path)
213 | 
214 | 
215 | def fnameFromFullPath(fname):
216 | 	# This just gets the local filename if you passed it some huge long name with the path.
217 | 	parts = fname.split('/')
218 | 	last_part = parts[-1]
219 | 	return(last_part)
220 | 
221 | def stripAnyTrailingSlash(path):
222 | 	if path[-1] == '/':
223 | 		return(path[:-1])
224 | 	else:
225 | 		return(path)
226 | 
227 | 
228 | def addTrailingSlashIfNeeded(path):
229 | 	if path[-1] == '/':
230 | 		return(path)
231 | 	else:
232 | 		return(path + '/')
233 | 
234 | 
235 | 
236 | 
237 | 
238 | def gifFromImages(imgs_path, gif_name, ext = '.png', delay=50):
239 | 
240 | 
241 | 	imgs_path = stripAnyTrailingSlash(imgs_path)
242 | 	file_list = glob.glob(imgs_path + '/' + '*' + ext) # Get all the pngs in the current directory
243 | 	#print(file_list)
244 | 	#print([fnameFromFullPath(x).split('.png')[0] for x in file_list])
245 | 	#list.sort(file_list, key=lambda x: int(x.split('_')[1].split('.png')[0]))
246 | 	list.sort(file_list, key=lambda x: int(fnameFromFullPath(x).split(ext)[0]))
247 | 	#list.sort(file_list) # Sort the images by #, this may need to be tweaked for your use case
248 | 	#print(file_list)
249 | 	assert len(file_list) < 300, 'Too many files ({}), will probably crash convert command.'.format(len(file_list))
250 | 
251 | 	output_fname = '{}/{}.gif'.format(imgs_path, gif_name)
252 | 
253 | 	check_call_arglist = ['convert'] + ['-delay', str(delay)] + file_list + [output_fname]
254 | 	#print(check_call_arglist)
255 | 	print('Calling convert command to create gif...')
256 | 	subprocess.check_call(check_call_arglist)
257 | 	print('done.')
258 | 	return(output_fname)
259 | 	# older method:
260 | 
261 | 	'''with open('image_list.txt', 'w') as file:
262 | 	    for item in file_list:
263 | 	        file.write("%s\n" % item)
264 | 
265 | 	os.system('convert @image_list.txt {}/{}.gif'.format(imgs_path,gif_name)) # On windows convert is 'magick'
266 | 	'''
267 | 
268 | #
269 | 


--------------------------------------------------------------------------------
/scrap_old_test/agent1.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | from math import sqrt
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.optim as optim
  8 | from torch.distributions import Categorical
  9 | from copy import deepcopy
 10 | 
 11 | 
 12 | class DQN(nn.Module):
 13 | 
 14 | 	def __init__(self, D_in, H, D_out, NL_fn=torch.tanh, softmax=False):
 15 | 		super(DQN, self).__init__()
 16 | 
 17 | 		self.lin1 = nn.Linear(D_in,H)
 18 | 		self.lin2 = nn.Linear(H,D_out)
 19 | 		self.NL_fn = NL_fn
 20 | 		self.softmax = softmax
 21 | 
 22 | 	def forward(self, x):
 23 | 		x = self.lin1(x)
 24 | 		#x = F.relu(x)
 25 | 		#x = torch.tanh(x)
 26 | 		x = self.NL_fn(x)
 27 | 		x = self.lin2(x)
 28 | 		if self.softmax:
 29 | 			x = torch.softmax(x,dim=1)
 30 | 		return(x)
 31 | 
 32 | 
 33 | 
 34 | class agent1:
 35 | 
 36 | 
 37 | 	def __init__(self, **kwargs):
 38 | 
 39 | 		self.xlims = kwargs.get('xlims', np.array([-0.5,0.5]))
 40 | 		self.ylims = kwargs.get('ylims', np.array([-0.5,0.5]))
 41 | 		self.lims = np.array((self.xlims,self.ylims))
 42 | 		self.max_dist = sqrt(np.ptp(self.xlims)**2 + np.ptp(self.ylims)**2)
 43 | 		self.a = kwargs.get('a',1.0)
 44 | 		self.drag = kwargs.get('drag', 0.5)
 45 | 		self.time_step = kwargs.get('dt',10**-1)
 46 | 		self.reward_type = kwargs.get('reward','sparse')
 47 | 
 48 | 		self.passed_params = {}
 49 | 		check_params = ['a', 'drag', 'dt', 'reward']
 50 | 		for param in check_params:
 51 | 			if kwargs.get(param, None) is not None:
 52 | 				self.passed_params[param] = kwargs.get(param, None)
 53 | 
 54 | 		self.N_actions = 4
 55 | 
 56 | 
 57 | 		self.circ_rad = np.ptp(self.xlims)/20.0
 58 | 		self.target_rad = 1*self.circ_rad
 59 | 		self.resetTarget()
 60 | 
 61 | 		self.pos0 = np.array([self.xlims.mean()/2.0,self.ylims.mean()/2.0])
 62 | 		self.v0 = np.array([0.0,0.0])
 63 | 		self.resetStateValues()
 64 | 		self.accel_array = np.array([[0,1],[0,-1],[-1,0],[1,0]])
 65 | 
 66 | 
 67 | 
 68 | 		self.N_state_terms = len(self.getStateVec())
 69 | 
 70 | 		self.HLN = 20
 71 | 
 72 | 		self.dtype = torch.float32
 73 | 		torch.set_default_dtype(self.dtype)
 74 | 
 75 | 		# I think it's already randomly initializing the weights with a gaussian mean=0, std=1
 76 | 		self.policy_NN = DQN(self.N_state_terms, self.HLN, self.N_actions, softmax=True)
 77 | 
 78 | 		'''for p in self.policy_NN.parameters():
 79 | 			print(p.data)'''
 80 | 		self.N_weight_tensors = len(list(self.policy_NN.parameters()))
 81 | 
 82 | 
 83 | 		self.N_mate_swaps = 18
 84 | 		self.N_mutations = 2
 85 | 
 86 | 
 87 | 
 88 | 	def puckTargetDist(self):
 89 | 		return(sqrt(np.sum((self.pos-self.target)**2)))
 90 | 
 91 | 
 92 | 	def addToHist(self):
 93 | 		self.pos_hist = np.concatenate((self.pos_hist,[self.pos]))
 94 | 		self.v_hist = np.concatenate((self.v_hist,[self.v]))
 95 | 		self.t.append(self.t[-1] + self.time_step)
 96 | 		self.r_hist.append(self.reward())
 97 | 
 98 | 
 99 | 	def resetTarget(self):
100 | 
101 | 		self.target = self.target_rad + self.lims[:,0] + np.random.random((2,))*(np.ptp(self.lims,axis=1)-2*self.target_rad)
102 | 
103 | 
104 | 	def iterateEuler(self,action):
105 | 
106 | 		#this uses the Euler-Cromer method to move.
107 | 
108 | 		#Right now I'm just gonna make it sit against a wall if it goes to the
109 | 		#boundary, but it might be cool to make periodic bry conds, to see if it would
110 | 		#learn to zoom around it.
111 | 
112 | 		a = self.actionToAccel(action) - self.drag*self.v
113 | 
114 | 		v_next = self.v + a*self.time_step
115 | 		pos_next = self.pos + v_next*self.time_step
116 | 
117 | 		#To handle the walls
118 | 		for i in [0,1]:
119 | 			if pos_next[i] < (self.lims[i,0] + self.circ_rad):
120 | 				pos_next[i] = self.lims[i,0] + self.circ_rad
121 | 				# This makes it "bounce" off the wall, so it keeps momentum.
122 | 				v_next[i] = -v_next[i]
123 | 				# This makes it "stick" to the wall.
124 | 				#v_next[i] = 0
125 | 
126 | 			if pos_next[i] > (self.lims[i,1] - self.circ_rad):
127 | 				pos_next[i] = self.lims[i,1] - self.circ_rad
128 | 				v_next[i] = -v_next[i]
129 | 				#v_next[i] = 0
130 | 
131 | 		self.pos = pos_next
132 | 		self.v = v_next
133 | 		self.addToHist()
134 | 
135 | 
136 | 	def actionToAccel(self,action):
137 | 		self.a_hist.append(action)
138 | 		return(self.a*self.accel_array[action])
139 | 
140 | 
141 | 	def softmaxAction(self, state_vec):
142 | 		pi_vals = self.policy_NN(state_vec)
143 | 		m = Categorical(pi_vals)
144 | 		return(m.sample())
145 | 
146 | 	###################### Required agent functions
147 | 
148 | 
149 | 	def mate(self, other_agent):
150 | 
151 | 		ag1 = deepcopy(self)
152 | 		ag2 = deepcopy(other_agent)
153 | 
154 | 		lin1_weight_shape = ag1.policy_NN.lin1.weight.data.shape
155 | 		lin1_bias_shape = ag1.policy_NN.lin1.bias.data.shape
156 | 		lin2_weight_shape = ag1.policy_NN.lin2.weight.data.shape
157 | 		lin2_bias_shape = ag1.policy_NN.lin2.bias.data.shape
158 | 
159 | 
160 | 		for i in range(self.N_mate_swaps):
161 | 
162 | 			r1 = np.random.randint(0, lin1_weight_shape[0])
163 | 			r2 = np.random.randint(0, lin1_weight_shape[1])
164 | 			ag1.policy_NN.lin1.weight.data[r1,r2], ag2.policy_NN.lin1.weight.data[r1,r2] = ag2.policy_NN.lin1.weight.data[r1,r2], ag1.policy_NN.lin1.weight.data[r1,r2]
165 | 
166 | 			r1 = np.random.randint(0, lin2_weight_shape[0])
167 | 			r2 = np.random.randint(0, lin2_weight_shape[1])
168 | 			ag1.policy_NN.lin2.weight.data[r1,r2], ag2.policy_NN.lin2.weight.data[r1,r2] = ag2.policy_NN.lin2.weight.data[r1,r2], ag1.policy_NN.lin2.weight.data[r1,r2]
169 | 
170 | 			r1 = np.random.randint(0, lin1_weight_shape[0])
171 | 			ag1.policy_NN.lin1.bias.data[r1], ag2.policy_NN.lin1.bias.data[r1] = ag2.policy_NN.lin1.bias.data[r1], ag1.policy_NN.lin1.bias.data[r1]
172 | 
173 | 			r1 = np.random.randint(0, lin2_weight_shape[0])
174 | 			ag1.policy_NN.lin2.bias.data[r1], ag2.policy_NN.lin2.bias.data[r1] = ag2.policy_NN.lin2.bias.data[r1], ag1.policy_NN.lin2.bias.data[r1]
175 | 
176 | 		return(ag1, ag2)
177 | 
178 | 
179 | 	def isSameState(self, other_agent):
180 | 		return(False)
181 | 
182 | 
183 | 	def mutate(self):
184 | 
185 | 		lin1_weight_shape = self.policy_NN.lin1.weight.data.shape
186 | 		lin1_bias_shape = self.policy_NN.lin1.bias.data.shape
187 | 		lin2_weight_shape = self.policy_NN.lin2.weight.data.shape
188 | 		lin2_bias_shape = self.policy_NN.lin2.bias.data.shape
189 | 
190 | 		for i in range(self.N_mutations):
191 | 
192 | 			r1 = np.random.randint(0, lin1_weight_shape[0])
193 | 			r2 = np.random.randint(0, lin1_weight_shape[1])
194 | 			self.policy_NN.lin1.weight.data[r1,r2] = np.random.randn()
195 | 
196 | 			r1 = np.random.randint(0, lin2_weight_shape[0])
197 | 			r2 = np.random.randint(0, lin2_weight_shape[1])
198 | 			self.policy_NN.lin2.weight.data[r1,r2] = np.random.randn()
199 | 
200 | 			r1 = np.random.randint(0, lin1_weight_shape[0])
201 | 			self.policy_NN.lin1.bias.data[r1] = np.random.randn()
202 | 
203 | 			r1 = np.random.randint(0, lin2_weight_shape[0])
204 | 			self.policy_NN.lin2.bias.data[r1] = np.random.randn()
205 | 
206 | 
207 | 
208 | 	def fitnessFunction(self):
209 | 		self.fixedLengthEpisode(100)
210 | 		# I think the fitness function is meant to be minimized, so we should pass it
211 | 		# the negative of the total reward.
212 | 		return(-sum(self.r_hist))
213 | 
214 | 
215 | 
216 | 	def fixedLengthEpisode(self, N_steps):
217 | 		self.resetTarget()
218 | 		self.resetStateValues()
219 | 
220 | 		for i in range(N_steps):
221 | 		    s = torch.tensor(self.getStateVec(), dtype=torch.float32).unsqueeze(dim=0)
222 | 		    a = self.softmaxAction(s)
223 | 		    r, s_next = self.iterate(a)
224 | 
225 | 
226 | 	def getPassedParams(self):
227 | 		#This returns a dict of params that were passed to the agent, that apply to the agent.
228 | 		#So if you pass it a param for 'reward', it will return that, but it won't return the
229 | 		#default val if you didn't pass it.
230 | 		return(self.passed_params)
231 | 
232 | 
233 | 	def getStateVec(self):
234 | 		assert self.target is not None, 'Need target to get state vec'
235 | 		return(np.concatenate((self.pos,self.v,self.target)))
236 | 
237 | 
238 | 	def getState(self):
239 | 		return(self.getStateVec())
240 | 
241 | 	def printState(self):
242 | 		print(self.getState())
243 | 
244 | 	def reward(self):
245 | 
246 | 		assert self.target is not None, 'Need a target'
247 | 
248 | 		max_R = 1
249 | 
250 | 		if self.reward_type == 'sparse':
251 | 			if self.puckTargetDist() <= (self.target_rad + self.circ_rad):
252 | 				return(max_R)
253 | 			else:
254 | 				return(-0.01)
255 | 
256 | 		if self.reward_type == 'shaped':
257 | 			#return(max_R*(self.max_dist/2.0 - self.puckTargetDist()))
258 | 			#These numbers will probably have to change if a, dt, or the dimensions change.
259 | 			return(-0.5*self.puckTargetDist() + 0.4)
260 | 
261 | 
262 | 	def initEpisode(self):
263 | 		self.resetStateValues()
264 | 		self.resetTarget()
265 | 
266 | 
267 | 	def iterate(self,action):
268 | 		self.iterateEuler(action)
269 | 
270 | 		r = self.reward()
271 | 		if r > 0:
272 | 			self.resetTarget()
273 | 
274 | 		return(r,self.getStateVec())
275 | 
276 | 
277 | 	def resetStateValues(self):
278 | 
279 | 		self.pos = self.pos0
280 | 		self.v = self.v0
281 | 
282 | 		self.pos_hist = np.array([self.pos])
283 | 		self.v_hist = np.array([self.v])
284 | 		self.t = [0]
285 | 		self.a_hist = [0]
286 | 		self.r_hist = []
287 | 
288 | 
289 | 	def drawState(self, ax):
290 | 
291 | 		ax.clear()
292 | 		ax.set_xlim(tuple(self.xlims))
293 | 		ax.set_ylim(tuple(self.ylims))
294 | 
295 | 		ax.set_xlabel('x')
296 | 		ax.set_ylabel('y')
297 | 		ax.set_aspect('equal')
298 | 
299 | 		puck = plt.Circle(tuple(self.pos), self.circ_rad, color='tomato')
300 | 		ax.add_artist(puck)
301 | 
302 | 		if self.target is not None:
303 | 			target = plt.Circle(tuple(self.target), self.target_rad, color='seagreen')
304 | 			ax.add_artist(target)
305 | 
306 | 
307 | 	def plotStateParams(self,axes):
308 | 
309 | 		ax1 = axes[0]
310 | 		ax2 = axes[1]
311 | 		ax3 = axes[2]
312 | 		ax4 = axes[3]
313 | 
314 | 		ax1.clear()
315 | 		ax1.plot(self.pos_hist[:,0][-1000:],label='x')
316 | 		ax1.plot(self.pos_hist[:,1][-1000:],label='y')
317 | 		ax1.legend()
318 | 
319 | 		ax2.clear()
320 | 		ax2.plot(self.a_hist[-1000:],label='a')
321 | 		ax2.set_yticks([0,1,2,3])
322 | 		ax2.set_yticklabels(['U','D','L','R'])
323 | 		ax2.legend()
324 | 
325 | 
326 | 		ax3.clear()
327 | 		ax3.plot(self.r_hist[-1000:],label='R')
328 | 		ax3.legend()
329 | 
330 | 
331 | 		ax4.clear()
332 | 		ax4.plot(self.v_hist[:,0][-1000:],label='vx')
333 | 		ax4.plot(self.v_hist[:,1][-1000:],label='vy')
334 | 		ax4.legend()
335 | 
336 | 
337 | 
338 | 
339 | #
340 | 


--------------------------------------------------------------------------------
/classes/PopTests.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('./classes')
  3 | import Population
  4 | import matplotlib.pyplot as plt
  5 | from statistics import mean,stdev
  6 | import FileSystemTools as fst
  7 | from time import time
  8 | import numpy as np
  9 | import os
 10 | import glob
 11 | from math import sqrt, ceil
 12 | import subprocess
 13 | 
 14 | 
 15 | 
 16 | def varyParam(**kwargs):
 17 | 
 18 |     st = fst.getCurTimeObj()
 19 | 
 20 |     date_time = fst.getDateString()
 21 |     notes = kwargs.get('notes', '')
 22 |     N_runs = kwargs.get('N_runs', 1)
 23 |     show_plot = kwargs.get('show_plot', False)
 24 | 
 25 |     exclude_list = ['notes', 'N_runs', 'show_plot']
 26 |     vary_params, vary_param_dict_list, vary_param_tups = fst.parseSingleAndListParams(kwargs,exclude_list)
 27 | 
 28 |     label = 'vary_' + fst.listToFname(vary_params) + '_' + notes
 29 |     dir = fst.makeLabelDateDir(label)
 30 |     print('Saving vary param results to: ', dir)
 31 |     img_ext = '.png'
 32 |     base_fname = fst.combineDirAndFile(dir, label + date_time)
 33 |     img_fname = base_fname + img_ext
 34 | 
 35 |     log_fname = base_fname + '_log.txt'
 36 |     fst.writeDictToFile(kwargs, log_fname)
 37 | 
 38 |     # Set the SD for each entry to 0. If there's only 1 run each, that's fine. If
 39 |     # there are several, it will replace the 0's.
 40 |     R_tots = []
 41 |     SD = [0]*len(vary_param_dict_list)
 42 | 
 43 |     for i, kws in enumerate(vary_param_dict_list):
 44 | 
 45 |         print('\n{}\n'.format(vary_param_tups[i]))
 46 |         results = []
 47 |         for j in range(N_runs):
 48 |             print('run ',j)
 49 | 
 50 |             p1 = Population.Population(**kws, dir=dir, fname_notes=fst.paramDictToFnameStr(vary_param_tups[i]))
 51 | 
 52 |             _, _, r_tot = p1.evolve(**kws)
 53 | 
 54 |             results.append(r_tot)
 55 | 
 56 |         R_tots.append(mean(results))
 57 |         if N_runs>1:
 58 |             SD[i] = stdev(results)
 59 | 
 60 | 
 61 |     plt.close('all')
 62 |     fig,axes = plt.subplots(1,1,figsize=(6,9))
 63 | 
 64 |     plt.errorbar(list(range(len(R_tots))), R_tots, yerr=SD, fmt='ro-')
 65 | 
 66 |     axes.set_xticks(list(range(len(R_tots))))
 67 |     x_tick_labels = ['\n'.join(fst.dictToStringList(param)) for param in vary_param_tups]
 68 |     axes.set_xticklabels(x_tick_labels, rotation='vertical')
 69 |     axes.set_ylabel('Total reward')
 70 |     plt.tight_layout()
 71 |     plt.savefig(img_fname)
 72 | 
 73 |     vary_param_labels = [','.join(fst.dictToStringList(param)) for param in vary_param_tups]
 74 |     f = open(base_fname + '_values.txt','w+')
 75 |     for label, val, sd in zip(vary_param_labels, R_tots, SD):
 76 |         f.write('{}\t{}\t{}\n'.format(label, val, sd))
 77 |     f.close()
 78 | 
 79 |     print('\n\ntook {} to execute'.format(fst.getTimeDiffStr(st)))
 80 | 
 81 |     plotRewardCurvesByVaryParam(dir, searchlabel='bestscore')
 82 |     plotRewardCurvesByVaryParam(dir, searchlabel='meanscore')
 83 | 
 84 |     if show_plot:
 85 |         plt.show()
 86 | 
 87 | 
 88 | 
 89 | 
 90 | def plotRewardCurvesByVaryParam(dir, searchlabel, **kwargs):
 91 | 
 92 |     #Use the "values" file from now on to get the vary_param values
 93 |     # searchlabel will be the thing the fname we're searching for is prefaced with,
 94 |     # so we can do the same for multiple things.
 95 | 
 96 |     # Find the values file
 97 |     val_file_list = glob.glob(fst.addTrailingSlashIfNeeded(dir) + 'vary_' + '*' + 'values.txt')
 98 | 
 99 |     assert len(val_file_list)==1, 'there needs to be exactly one values.txt file.'
100 | 
101 |     vals_file = val_file_list[0]
102 | 
103 |     # Read in each line, corresponding to each vary params tuple
104 |     with open(vals_file, 'r') as f:
105 |         vary_param_vals = f.read().split('\n')
106 | 
107 |     # they're tab sep'd, so split and grab the first of each col.
108 |     vary_param_vals = [x.split('\t')[0] for x in vary_param_vals if x!='']
109 |     # Expects the vary params to be separated by underscores -- not ideal.
110 |     vary_param_vals = [x.replace(',','_') for x in vary_param_vals]
111 |     # Get the files that contain this series of vary vals...
112 |     vary_param_files = [glob.glob(fst.addTrailingSlashIfNeeded(dir) + searchlabel + '*' + val + '*' + '.txt') for val in vary_param_vals]
113 | 
114 | 
115 | 
116 |     fig, ax = plt.subplots(1, 1, figsize=(10,8))
117 | 
118 |     line_cols = ['darkred', 'mediumblue', 'darkgreen', 'goldenrod', 'purple', 'darkorange', 'black']
119 |     shade_cols = ['tomato', 'dodgerblue', 'lightgreen', 'khaki', 'plum', 'peachpuff', 'lightgray']
120 |     max_total = -1000
121 |     min_total = 1000
122 |     N_stds = 2
123 |     N_skip = 0
124 | 
125 |     # This is a really hacky way of lining up curves that are shifted. You pass it a list of
126 |     # how each curve (the avg) will be scaled and how each will be offset. If you don't pass it
127 |     # anything, it won't do anything differently.
128 |     scale_factors = kwargs.get('scale_factors', np.ones(len(vary_param_vals)))
129 |     offsets = kwargs.get('offsets', np.zeros(len(vary_param_vals)))
130 | 
131 |     # For doing the scale and offset thing, and making sure the ranges are right.
132 |     print('vary_param_vals', vary_param_vals)
133 |     for i, (val, file_group) in enumerate(zip(vary_param_vals, vary_param_files)):
134 |         dat_array = np.array([np.loadtxt(fname) for fname in file_group])
135 |         avg = np.mean(dat_array, axis=0)*scale_factors[i] + offsets[i]
136 |         std = np.std(dat_array, axis=0)*scale_factors[i]
137 |         if max((avg + N_stds*std)[N_skip:]) > max_total:
138 |             max_total = max((avg + N_stds*std)[N_skip:])
139 |         if min((avg - N_stds*std)[N_skip:]) < min_total:
140 |             min_total = min((avg - N_stds*std)[N_skip:])
141 |         plt.plot(avg, color=line_cols[i], label=val)
142 |         plt.fill_between(np.array(range(len(avg))), avg - std, avg + std, facecolor=shade_cols[i], alpha=0.5)
143 | 
144 |     #print(max_total, min_total)
145 |     plt.legend()
146 |     plt.xlabel('generations')
147 |     plt.ylabel(searchlabel)
148 |     plt.ylim((min_total,max_total))
149 | 
150 |     plt.savefig(fst.addTrailingSlashIfNeeded(dir) + 'all_' + searchlabel + '__'.join(vary_param_vals) + '__' + fst.getDateString() + '.png')
151 | 
152 |     # For each one separately
153 |     print('vary_param_vals', vary_param_vals)
154 |     for i, (val, file_group) in enumerate(zip(vary_param_vals, vary_param_files)):
155 |         #print(max_total, min_total)
156 |         plt.clf()
157 |         dat_array = np.array([np.loadtxt(fname) for fname in file_group])
158 |         avg = np.mean(dat_array, axis=0)*scale_factors[i] + offsets[i]
159 |         std = np.std(dat_array, axis=0)*scale_factors[i]
160 | 
161 |         plt.plot(avg, color=line_cols[i], label=val)
162 |         plt.legend()
163 |         plt.xlabel('generations')
164 |         plt.ylabel(searchlabel)
165 |         plt.ylim((min_total,max_total))
166 |         plt.fill_between(np.array(range(len(avg))), avg - std, avg + std, facecolor=shade_cols[i], alpha=0.5)
167 |         plt.savefig(fst.addTrailingSlashIfNeeded(dir) + searchlabel + '__' + val + '__' + fst.getDateString() + '.png')
168 | 
169 | 
170 | 
171 | def plotPopulationProperty(dir, search_label, **kwargs):
172 | 
173 | 
174 |     show_plot = kwargs.get('show_plot', False)
175 |     save_plot = kwargs.get('save_plot', True)
176 |     make_hist_gif = kwargs.get('make_hist_gif', True)
177 | 
178 |     # Find the values file
179 |     prop_file_list = glob.glob(fst.addTrailingSlashIfNeeded(dir) + search_label + '*' + '.txt')
180 | 
181 |     assert len(prop_file_list)==1, 'there needs to be exactly one .txt file.'
182 | 
183 |     prop_file = prop_file_list[0]
184 | 
185 |     prop_dat = np.loadtxt(prop_file)
186 | 
187 |     N_gen = prop_dat.shape[0]
188 |     max_gif_frames = 299
189 |     gif_save_period = ceil(N_gen/max_gif_frames)
190 | 
191 |     avg = np.mean(prop_dat)
192 |     std = np.std(prop_dat)
193 | 
194 |     print('dat min: {:.2f}'.format(np.min(prop_dat)))
195 |     print('dat max: {:.2f}'.format(np.max(prop_dat)))
196 |     print('dat mean: {:.2f}'.format(avg))
197 |     print('dat std: {:.2f}'.format(std))
198 | 
199 |     dat_lb = max(np.min(prop_dat), avg - 2*std)
200 |     dat_ub = min(np.max(prop_dat), avg + 2*std)
201 | 
202 |     dat_lims = (dat_lb - 0.1*abs(dat_lb), dat_ub + 0.1*abs(dat_ub))
203 | 
204 |     # Make histogram gif
205 | 
206 |     if make_hist_gif:
207 | 
208 |         gif_dir = fst.makeDir(fst.combineDirAndFile(dir, 'gif_imgs'))
209 | 
210 |         for i, gen_dat in enumerate(prop_dat):
211 |             plt.clf()
212 |             plt.hist(gen_dat, facecolor='dodgerblue', edgecolor='k', label=search_label, alpha=0.9, density=True)
213 |             plt.axvline(np.mean(gen_dat), color='k', linestyle='dashed', linewidth=1)
214 |             plt.xlim(dat_lims)
215 |             plt.ylim((0, 1.0/len(gen_dat)))
216 |             plt.title(f'generation {i}')
217 |             plt.xlabel(search_label)
218 |             plt.ylabel('counts')
219 | 
220 |             if save_plot:
221 |                 if i%gif_save_period == 0:
222 |                     fname = fst.combineDirAndFile(gif_dir, f'{i}.png')
223 |                     plt.savefig(fname)
224 | 
225 | 
226 |         try:
227 |             gif_name = fst.gifFromImages(gif_dir, f'{search_label}_hist', ext='.png', delay=5)
228 |             print(gif_name)
229 |             gif_basename = fst.fnameFromFullPath(gif_name)
230 |             subprocess.check_call(['mv', gif_name, fst.combineDirAndFile(dir, gif_basename)])
231 |             subprocess.check_call(['rm', '-rf', gif_dir])
232 |         except:
233 |             print('problem in creating gif')
234 | 
235 | 
236 |     plt.clf()
237 | 
238 |     # Make time scatter plot
239 | 
240 |     # Right now this is in the structure where a row is a generation and each
241 |     # entry of that row is an individ. We want to plot it per gen, so we need
242 |     # to make it a set of (generation, value) points (so 5 gens of 8 individs would
243 |     # go 5x8 -> 5x8x2 -> 40x2 -> 2x40)
244 |     prop_pts = np.array([[[i, val] for val in prop_dat[i]] for i in range(len(prop_dat))])
245 |     N_tot_entries = prop_dat.shape[0]*prop_dat.shape[1] # Makes it 5x8x2
246 |     prop_pts = np.reshape(prop_pts, (N_tot_entries, 2)) # Makes it 40x2
247 |     prop_pts = np.swapaxes(prop_pts, 0, 1) # Makes it 2x40
248 | 
249 |     plt.plot(prop_pts[0], prop_pts[1], 'o', color='dodgerblue')
250 |     plt.xlabel('generations')
251 |     plt.ylabel(search_label)
252 |     plt.ylim(dat_lims)
253 | 
254 |     if save_plot:
255 |         plt.savefig(fst.combineDirAndFile(dir, search_label + '_scatter_plot.png'))
256 | 
257 |     if show_plot:
258 |         plt.show()
259 | 
260 |     plt.clf()
261 | 
262 |     # Make time std plot
263 | 
264 |     gen_mean = np.mean(prop_dat, axis=1)
265 |     gen_std = np.std(prop_dat, axis=1)
266 | 
267 |     line_cols = ['darkred', 'mediumblue', 'darkgreen', 'goldenrod', 'purple', 'darkorange', 'black']
268 |     shade_cols = ['tomato', 'dodgerblue', 'lightgreen', 'khaki', 'plum', 'peachpuff', 'lightgray']
269 | 
270 |     plt.fill_between(np.array(range(len(gen_mean))), gen_mean - gen_std, gen_mean + gen_std, facecolor=shade_cols[0], alpha=0.5)
271 |     plt.plot(gen_mean, color=line_cols[0])
272 | 
273 |     plt.xlabel('generations')
274 |     plt.ylabel(search_label)
275 |     plt.ylim(dat_lims)
276 | 
277 |     if save_plot:
278 |         plt.savefig(fst.combineDirAndFile(dir, search_label + '_mean-std_plot.png'))
279 | 
280 |     if show_plot:
281 |         plt.show()
282 | 
283 |     plt.close()
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | #
302 | 


--------------------------------------------------------------------------------
/classes/Population.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('./classes')
  3 | from EPANN import EPANN
  4 | from copy import deepcopy
  5 | import matplotlib.pyplot as plt
  6 | import FileSystemTools as fst
  7 | import RunTools as rt
  8 | import numpy as np
  9 | import subprocess
 10 | from math import sqrt, floor
 11 | import movie_combine
 12 | import traceback as tb
 13 | 
 14 | class Population:
 15 | 
 16 | 
 17 |     def __init__(self, **kwargs):
 18 | 
 19 |         self.agent_class = kwargs.get('agent_class', None)
 20 |         assert self.agent_class is not None, 'Need to provide an agent class! exiting'
 21 | 
 22 |         self.init_kwargs = kwargs
 23 | 
 24 |         self.N_pop = kwargs.get('N_pop', 15)
 25 |         self.mut_type = kwargs.get('mut_type', 'change_topo')
 26 |         self.gauss_std = kwargs.get('std', 0.2)
 27 |         self.best_N_frac = kwargs.get('best_N_frac', 1/5.0)
 28 | 
 29 |         self.fname_notes = '{}_{}{}'.format(kwargs.get('fname_notes', ''), self.agent_class.__name__, kwargs.get('env_name', ''))
 30 |         self.datetime_str = fst.getDateString()
 31 |         self.base_dir = kwargs.get('base_dir', 'misc_runs')
 32 |         self.dir = fst.combineDirAndFile(self.base_dir, 'evolve_{}_{}'.format(self.datetime_str, self.fname_notes))
 33 |         fst.makeDir(self.dir)
 34 |         self.plot_dir = fst.makeDir(fst.combineDirAndFile(self.dir, 'plots'))
 35 |         print('run dir: ', self.dir)
 36 |         pop_kwargs = {'run_dir' : self.dir}
 37 |         both_kwargs = {**kwargs, **pop_kwargs}
 38 | 
 39 |         self.population = [EPANN(**both_kwargs) for i in range(self.N_pop)]
 40 | 
 41 | 
 42 | 
 43 |     def evolve(self, **kwargs):
 44 | 
 45 | 
 46 |         start_time = fst.getCurTimeObj()
 47 | 
 48 |         N_trials_per_agent = kwargs.get('N_trials_per_agent', 3)
 49 |         N_episode_steps = kwargs.get('N_episode_steps', 400)
 50 |         N_gen = kwargs.get('N_gen', 50)
 51 |         N_trials_per_agent = kwargs.get('N_trials_per_agent', 3)
 52 |         N_runs_each_champion = kwargs.get('N_runs_each_champion', 5)
 53 |         N_runs_with_best = kwargs.get('N_runs_with_best', 10)
 54 |         assert N_runs_with_best > 0, 'Need at least one run with best individ!'
 55 |         record_final_runs = kwargs.get('record_final_runs', False)
 56 |         show_final_runs = kwargs.get('show_final_runs', False)
 57 | 
 58 |         # Create a log file for the kwargs
 59 |         log_fname = fst.combineDirAndFile(self.dir, f'log_{self.datetime_str}.txt')
 60 |         fst.writeDictToFile({**self.init_kwargs, **kwargs}, log_fname)
 61 | 
 62 | 
 63 |         best_FFs = []
 64 |         mean_FFs = []
 65 | 
 66 |         all_FFs = []
 67 |         all_nodecounts = []
 68 |         all_weightcounts = []
 69 |         champion_FF_mean_std = [] # A list of pairs of [mean, std] for runs of the current champion.
 70 | 
 71 | 
 72 |         #try:
 73 |         for i in range(N_gen):
 74 | 
 75 |             best_FF = -100000000
 76 |             mean_FF = 0
 77 | 
 78 |             mean_Rs = []
 79 |             for j, individ in enumerate(self.population):
 80 |                 mean_episode_score = 0
 81 |                 for run in range(N_trials_per_agent):
 82 |                     mean_episode_score += individ.runEpisode(N_episode_steps)
 83 | 
 84 |                 mean_episode_score = mean_episode_score/N_trials_per_agent
 85 |                 mean_Rs.append([j, mean_episode_score])
 86 |                 mean_FF += mean_episode_score
 87 |                 if mean_episode_score > best_FF:
 88 |                     best_FF = mean_episode_score
 89 | 
 90 |             mean_FF = mean_FF/self.N_pop
 91 |             best_FFs.append(best_FF)
 92 |             mean_FFs.append(mean_FF)
 93 | 
 94 |             mean_Rs_no_label = [x[1] for x in mean_Rs]
 95 | 
 96 |             # Run the champion for several times to get stats
 97 |             champion_ind = self.sortByFitnessFunction(mean_Rs)[0][0]
 98 |             champion_scores = []
 99 |             for run in range(N_runs_each_champion):
100 |                 champion_scores.append(self.population[champion_ind].runEpisode(N_episode_steps))
101 |             champion_FF_mean_std.append([np.mean(champion_scores), np.std(champion_scores)])
102 | 
103 | 
104 |             # Update with progress
105 |             if i%max(1, int(N_gen/20))==0:
106 |                 print('\ngen {:.1f}. Best FF = {:.4f}, mean FF = {:.4f}'.format(i, best_FF, mean_FF))
107 |                 self.plotPopHist(mean_Rs_no_label, 'pop_FF')
108 |                 if self.mut_type == 'change_topo':
109 |                     self.plotPopHist([len(epann.node_list) for epann in self.population], 'pop_nodecount')
110 |                     self.plotPopHist([len(epann.weights_list) for epann in self.population], 'pop_weightcount')
111 |                     fname = fst.combineDirAndFile(self.plot_dir, '{}_gen{}_{}.png'.format('bestNN', i, fst.getDateString()))
112 |                     self.population[champion_ind].plotNetwork(show_plot=False, save_plot=True, fname=fname, node_legend=True)
113 | 
114 |                     #print('network sizes: ', [len(x.node_list) for x in self.population])
115 |                     print('avg network size: {:.3f}'.format(sum([len(x.node_list) for x in self.population])/self.N_pop))
116 |                     #print('# network connections: ', [len(x.weights_list) for x in self.population])
117 |                     print('avg # network connections: {:.3f}'.format(sum([len(x.weights_list) for x in self.population])/self.N_pop))
118 | 
119 |             all_FFs.append(mean_Rs_no_label)
120 |             all_nodecounts.append([len(epann.node_list) for epann in self.population])
121 |             all_weightcounts.append([len(epann.weights_list) for epann in self.population])
122 | 
123 |             # Get the next gen by mutating
124 |             self.getNextGen(mean_Rs)
125 | 
126 | 
127 |         print('\n\nRun took: ', fst.getTimeDiffStr(start_time), '\n\n')
128 | 
129 |         self.saveScore(best_FFs, 'bestscore')
130 |         self.saveScore(mean_FFs, 'meanscore')
131 |         self.saveScore(all_FFs, 'all_FFs')
132 |         self.saveScore(all_nodecounts, 'nodecounts')
133 |         self.saveScore(all_weightcounts, 'weightcounts')
134 |         self.saveScore(champion_FF_mean_std, 'champion_FF_mean_std')
135 | 
136 |         # Plot best and mean FF curves for the population
137 |         plt.subplots(1, 1, figsize=(8,8))
138 |         plt.plot(mean_FFs, color='dodgerblue', label='Pop. avg FF')
139 |         plt.plot(best_FFs, color='tomato', label='Pop. best FF')
140 |         plt.xlabel('generations')
141 |         plt.ylabel('FF')
142 |         plt.legend()
143 |         fname = fst.combineDirAndFile(self.dir, '{}_{}.png'.format('FFplot', self.datetime_str))
144 |         plt.savefig(fname)
145 | 
146 |         plt.close()
147 | 
148 |         # Plot the mean and std for the champion of each generation
149 |         champion_FF_mean_std = np.array(champion_FF_mean_std)
150 |         champ_mean = champion_FF_mean_std[:,0]
151 |         champ_std = champion_FF_mean_std[:,1]
152 |         plt.fill_between(
153 |         np.array(range(len(champ_mean))),
154 |         champ_mean - champ_std,
155 |         champ_mean + champ_std,
156 |         facecolor='dodgerblue', alpha=0.5)
157 | 
158 |         plt.plot(champ_mean, color='mediumblue')
159 |         plt.xlabel('generations')
160 |         plt.ylabel('FF')
161 |         fname = fst.combineDirAndFile(self.dir, '{}_{}.png'.format('champion_mean-std_plot', self.datetime_str))
162 |         plt.savefig(fname)
163 | 
164 |         # Get an avg final score for the best individ. You know this will be the best one because
165 |         # the best one is preserved after getNextGen().
166 |         best_individ = self.population[0]
167 | 
168 |         # Save the NN of the best individ.
169 |         bestNN_fname = fst.combineDirAndFile(self.dir, f'bestNN_{self.agent_class.__name__}_{self.datetime_str}')
170 |         best_individ.saveNetworkToFile(fname=(bestNN_fname + '.json'))
171 |         best_individ.plotNetwork(show_plot=False, save_plot=True, fname=(bestNN_fname + '.png'), node_legend=True)
172 | 
173 |         # Something annoying happening with showing vs recording the final runs, but I'll figure it out later.
174 |         best_individ_scores = [best_individ.runEpisode(N_episode_steps,
175 |         show_episode=show_final_runs,
176 |         record_episode=record_final_runs,
177 |         **kwargs) for i in range(N_runs_with_best)]
178 |         #best_individ.agent.closeEnv()
179 |         best_individ_avg_score = np.mean(best_individ_scores)
180 | 
181 |         # Plot some more stuff with the saved dat
182 |         try:
183 |             rt.plotPopulationProperty(self.dir, 'all_FFs', make_hist_gif=False)
184 |             rt.plotPopulationProperty(self.dir, 'weightcounts', make_hist_gif=False)
185 |         except:
186 |             print('\n\n')
187 |             print(tb.format_exc())
188 |             print('plotPopulationProperty() failed, continuing')
189 | 
190 | 
191 | 
192 |         try:
193 |             if record_final_runs:
194 |                 N_side = min(3, floor(sqrt(N_runs_with_best)))
195 |                 movie_dir = best_individ.agent.record_dir
196 |                 movie_combine.combineMovieFiles(path=movie_dir, grid_size=f'{N_side}x{N_side}', make_gif=True)
197 |         except:
198 |             print('\n\n')
199 |             print(tb.format_exc())
200 |             print('failed combining movies into single panel')
201 | 
202 |         return_dict = {}
203 |         return_dict['best_FFs'] = best_FFs
204 |         return_dict['mean_FFs'] = mean_FFs
205 |         return_dict['best_individ_avg_score'] = best_individ_avg_score
206 | 
207 |         return(return_dict)
208 | 
209 | 
210 |     def getNextGen(self, FF_list):
211 | 
212 |         '''
213 |         This first sorts the pop by the (index, FF) list passed to it.
214 |         Then it takes the best_N of these indices in order. It starts the
215 |         new_pop with a clones of the best individ from the last gen. Then it adds
216 |         to new_pop by mutating the best_N until the pop is filled again.
217 | 
218 |         So, you can assume that for the new pop., pop[0] is the best one of the
219 |         LAST generation.
220 | 
221 |         '''
222 | 
223 |         pop_indices_sorted = self.sortByFitnessFunction(FF_list)
224 |         best_N = max(int(self.N_pop*self.best_N_frac), 2)
225 |         #best_N = 1
226 |         best_N_indices = [x[0] for x in pop_indices_sorted[:best_N]]
227 | 
228 |         new_pop = [self.population[best_N_indices[0]].clone()]
229 |         mod_counter = 0
230 | 
231 |         while len(new_pop)<self.N_pop:
232 | 
233 |             new_EPANN = self.population[best_N_indices[mod_counter%best_N]].clone()
234 |             if self.mut_type == 'change_topo':
235 |                 new_EPANN.mutate(std=self.gauss_std)
236 |             if self.mut_type == 'gauss_noise':
237 |                 new_EPANN.gaussMutate(std=self.gauss_std)
238 | 
239 |             new_pop.append(new_EPANN)
240 |             mod_counter += 1
241 | 
242 |         self.population = new_pop
243 | 
244 | 
245 |     def saveScore(self, score, label):
246 |         fname = fst.combineDirAndFile(self.dir, '{}_{}.txt'.format(label, self.datetime_str))
247 |         np.savetxt(fname, score, fmt='%.4f')
248 | 
249 | 
250 |     def sortByFitnessFunction(self, FF_list):
251 |         # Should be a list of tuples of (population index, FF)
252 |         # Sorts from best to worst FF.
253 |         pop_indices_sorted = sorted(FF_list, key=lambda x: -x[1])
254 |         return(pop_indices_sorted)
255 | 
256 | 
257 |     def plotPopHist(self, hist_var, label):
258 |         fig, ax = plt.subplots(1, 1, figsize=(8,8))
259 |         ax.hist(hist_var, facecolor='dodgerblue', edgecolor='k', label=label)
260 |         plt.axvline(np.mean(hist_var), color='k', linestyle='dashed', linewidth=1)
261 |         plt.xlabel(label)
262 |         ax.legend()
263 |         fname = fst.combineDirAndFile(self.plot_dir, '{}_{}.png'.format(label, fst.getDateString()))
264 |         plt.savefig(fname)
265 | 
266 |         # Is this the way to make sure too many figures don't get created? or plt.close()?
267 |         plt.close()
268 | 
269 | 
270 | 
271 | 
272 | 
273 | #
274 | 


--------------------------------------------------------------------------------
/classes/EPANN.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('./classes')
  3 | import numpy as np
  4 | from queue import Queue
  5 | from Node import Node
  6 | from copy import deepcopy
  7 | import matplotlib.pyplot as plt
  8 | import time
  9 | import random
 10 | import networkx as nx
 11 | import pygraphviz as pgv
 12 | import FileSystemTools as fst
 13 | import json
 14 | 
 15 | class EPANN:
 16 | 
 17 |     def __init__(self, agent_class, **kwargs):
 18 | 
 19 |         self.agent = agent_class(**kwargs)
 20 |         self.agent_class = agent_class
 21 | 
 22 |         self.render_type = kwargs.get('render_type', 'gym')
 23 |         self.verbose = kwargs.get('verbose', False)
 24 |         self.action_space_type = self.agent.action_space_type
 25 |         self.N_init_hidden_nodes = kwargs.get('N_init_hidden_nodes', 0)
 26 |         self.init_IO_weights = kwargs.get('init_IO_weights', False)
 27 |         self.N_inputs = self.agent.N_state_terms
 28 |         self.N_action_outputs = self.agent.N_actions
 29 |         self.N_total_outputs = self.N_action_outputs # + 3
 30 |         self.epsilon = 0.0
 31 | 
 32 |         self.input_node_indices = []
 33 |         self.output_node_indices = []
 34 | 
 35 |         self.node_list = []
 36 |         self.weights_list = set() # This will be a set of tuples of the form (parent index, child index)
 37 |         self.weights_dict = {}
 38 |         self.propagate_order = []
 39 | 
 40 |         self.weight_change_chance = kwargs.get('weight_change_chance', 0.98)
 41 |         self.weight_add_chance = kwargs.get('weight_add_chance', 0.09)
 42 |         self.weight_remove_chance = kwargs.get('weight_remove_chance', 0.05)
 43 |         self.node_add_chance = kwargs.get('node_add_chance', 0.0005)
 44 | 
 45 | 
 46 | 
 47 |         # Add bias node
 48 |         self.addNode(is_bias_node=True)
 49 | 
 50 |         # Add input and output nodes
 51 |         [self.addNode(is_input_node=True) for i in range(self.N_inputs)]
 52 |         [self.addNode(is_output_node=True) for i in range(self.N_total_outputs)]
 53 |         #print('input indices:', self.input_node_indices)
 54 |         #print('output indices:', self.output_node_indices)
 55 |         #self.plotNetwork()
 56 | 
 57 |         # Add connections from all inputs to all outputs (maybe disable later?)
 58 |         if self.init_IO_weights:
 59 |             [[self.addConnectingWeight((i, o), std=1.0) for o in self.output_node_indices] for i in self.input_node_indices]
 60 | 
 61 |         self.sortPropagateOrder()
 62 | 
 63 | 
 64 |     def saveNetworkToFile(self, **kwargs):
 65 | 
 66 |         default_fname = 'misc_runs/{}_NN_{}.json'.format(self.agent_class.__name__, fst.getDateString())
 67 | 
 68 |         fname = kwargs.get('fname', default_fname)
 69 | 
 70 |         # For saving the NN to file in a way that it can be read in again.
 71 |         NN_dict = {}
 72 |         NN_dict['N_nodes'] = len(self.node_list)
 73 |         NN_dict['N_input_nodes'] = len(self.input_node_indices)
 74 |         NN_dict['N_output_nodes'] = len(self.output_node_indices)
 75 |         NN_dict['input_nodes'] = self.input_node_indices
 76 |         NN_dict['output_nodes'] = self.output_node_indices
 77 |         NN_dict['bias_node'] = self.bias_node_index
 78 | 
 79 |         NN_dict['weights'] = []
 80 | 
 81 |         for par_index, child_index in list(self.weights_list):
 82 |             NN_dict['weights'].append({
 83 |             'parent' : par_index,
 84 |             'child' : child_index,
 85 |             'weight' : self.node_list[par_index].output_weights[child_index]
 86 |             })
 87 | 
 88 | 
 89 | 
 90 |         with open(fname, 'w') as outfile:
 91 |             json.dump(NN_dict, outfile, indent=4)
 92 | 
 93 | 
 94 |     def loadNetworkFromFile(self, fname):
 95 | 
 96 |         # This loads a NN from a .json file that was saved with
 97 |         # saveNetworkToFile(). Note that it will overwrite any existing NN
 98 |         # for this object.
 99 | 
100 |         with open(fname) as json_file:
101 |             NN_dict = json.load(json_file)
102 | 
103 |         self.node_list = []
104 |         self.input_node_indices = []
105 |         self.output_node_indices = []
106 | 
107 |         self.N_inputs = NN_dict['N_input_nodes']
108 |         self.N_total_outputs = NN_dict['N_output_nodes']
109 | 
110 |         N_other_nodes = NN_dict['N_nodes'] - (1 + self.N_inputs + self.N_total_outputs)
111 |         # Add bias node
112 |         self.addNode(is_bias_node=True)
113 | 
114 |         # Add input and output nodes
115 |         [self.addNode(is_input_node=True) for i in range(self.N_inputs)]
116 |         [self.addNode(is_output_node=True) for i in range(self.N_total_outputs)]
117 |         [self.addNode() for i in range(N_other_nodes)]
118 | 
119 |         for weight_dict in NN_dict['weights']:
120 |             self.addConnectingWeight((weight_dict['parent'], weight_dict['child']), weight_dict['weight'])
121 | 
122 |         #self.plotNetwork(show_plot=True)
123 |         pass
124 | 
125 | 
126 | 
127 | 
128 |     def addNode(self, is_input_node=False, is_output_node=False, is_bias_node=False):
129 | 
130 |         new_node = Node(len(self.node_list))
131 | 
132 |         if is_input_node:
133 |             new_node.setToInputNode()
134 |             self.input_node_indices.append(new_node.node_index)
135 | 
136 |         if is_output_node:
137 |             new_node.setToOutputNode()
138 |             self.output_node_indices.append(new_node.node_index)
139 | 
140 |         if is_bias_node:
141 |             new_node.setToBiasNode()
142 |             new_node.value = 1
143 |             self.bias_node_index = new_node.node_index
144 | 
145 |         self.node_list.append(new_node)
146 |         self.sortPropagateOrder()
147 |         return(new_node.node_index)
148 | 
149 | 
150 |     def addConnectingWeight(self, weight_parchild_tuple, val=None, std=0.1):
151 | 
152 |         # You shouldn't be calling this unless you already know it doesn't have that connection
153 |         # AND that that connection is valid (not leading from child to parent).
154 |         assert weight_parchild_tuple not in self.weights_list, 'Problem in addConnectingWeight!'
155 | 
156 |         parent_node_index, child_node_index = weight_parchild_tuple
157 |         self.node_list[child_node_index].addToInputIndices(parent_node_index)
158 |         self.node_list[parent_node_index].addToOutputWeights(child_node_index, val=val, std=std)
159 |         self.weights_list.add(weight_parchild_tuple)
160 |         self.weights_dict[weight_parchild_tuple] = self.node_list[parent_node_index].output_weights[child_node_index]
161 |         self.sortPropagateOrder()
162 | 
163 | 
164 |     def removeConnectingWeight(self, weight_parchild_tuple):
165 | 
166 |         # You shouldn't be calling this unless you already know it has that connection.
167 |         assert weight_parchild_tuple in self.weights_list, 'Problem in removeConnectingWeight!'
168 | 
169 |         parent_node_index, child_node_index = weight_parchild_tuple
170 |         self.node_list[child_node_index].removeFromInputIndices(parent_node_index)
171 |         self.node_list[parent_node_index].removeFromOutputWeights(child_node_index)
172 |         self.weights_list.remove(weight_parchild_tuple)
173 |         self.weights_dict.pop(weight_parchild_tuple)
174 |         self.sortPropagateOrder()
175 | 
176 | 
177 |     def addNodeInBetween(self, par_index, child_index):
178 |         # This adds a node in between existing nodes par_index and child_index, where the output of par_index went to child_index.
179 |         # Pass it the index of each.
180 |         self.print('adding node between nodes {} and {}'.format(par_index, child_index))
181 |         # Add node:
182 |         # New node:
183 |         old_weight = self.node_list[par_index].output_weights[child_index]
184 |         self.removeConnectingWeight((par_index, child_index))
185 |         new_node_index = self.addNode()
186 |         self.addConnectingWeight((par_index, new_node_index), val=old_weight)
187 |         self.addConnectingWeight((new_node_index, child_index), val=1)
188 | 
189 |         self.sortPropagateOrder()
190 | 
191 | 
192 |     def sortPropagateOrder(self):
193 | 
194 | 
195 |         # So something to keep in mind here is that if a node is isolated because its
196 |         # node was removed or something, it will just never enter the sort_prop list.
197 |         # That's probably fine, because it can't add anything anyway.
198 |         #
199 | 
200 |         sort_queue = Queue()
201 |         prop_order_set = set()
202 |         queue_set = set()
203 |         self.propagate_order = []
204 | 
205 |         # First add the output indices, which we'll work backwards from.
206 |         for ind in self.output_node_indices:
207 |             sort_queue.put(ind)
208 |             queue_set.add(ind)
209 | 
210 |         try:
211 | 
212 |             while not sort_queue.empty():
213 | 
214 |                 ind = sort_queue.get()
215 |                 queue_set.remove(ind)
216 |                 # Make sure all the children of this node are already in the list/set. If
217 |                 # one isn't, add this child to the queue if it's not already there,
218 |                 # (this is in case there's a "dead end" node that would never get seen by
219 |                 # tracing back from the outputs), put the node back in the queue, and continue
220 |                 # to the next loop iteration.
221 |                 #
222 |                 # You could also have it not break immediately, and add all its unseen children,
223 |                 # which might speed it up, but might also not.
224 |                 all_children_in_prop_order = True
225 | 
226 |                 for child_ind in self.node_list[ind].getOutputIndices():
227 |                     if child_ind not in prop_order_set:
228 |                         all_children_in_prop_order = False
229 |                         # Only want to add the child to the queue if it isn't already in it
230 |                         if child_ind not in queue_set:
231 |                             sort_queue.put(child_ind)
232 |                             queue_set.add(child_ind)
233 | 
234 |                         break
235 | 
236 |                 if all_children_in_prop_order:
237 |                     # This means that the node can now be added to the prop_order and set,
238 |                     # and also add its parents to the queue if they're not already.
239 |                     self.propagate_order.append(ind)
240 |                     prop_order_set.add(ind)
241 | 
242 |                     for parent_ind in self.node_list[ind].input_indices:
243 |                         if parent_ind not in queue_set:
244 |                             sort_queue.put(parent_ind)
245 |                             queue_set.add(parent_ind)
246 | 
247 |                 else:
248 |                     # If the children aren't all there already, put it back in the queue.
249 |                     sort_queue.put(ind)
250 |                     queue_set.add(ind)
251 |         except:
252 |             print('error in sortPropagateOrder()! Saving NN to file')
253 |             self.saveNetworkToFile()
254 |             self.plotNetwork(show_plot=False, save_plot=True, node_legend=True)
255 |         # Now it should be in order, where you can evaluate each node, starting with the input ones,
256 |         # and all the inputs should arrive in the right order.
257 |         self.propagate_order.reverse()
258 | 
259 | 
260 | 
261 |     def mutateAddNode(self):
262 |         if len(self.weights_list)>0:
263 |             par_index, child_index = random.choice(list(self.weights_list))
264 |             self.addNodeInBetween(par_index, child_index)
265 | 
266 | 
267 |     def mutateAddWeight(self, std=0.1):
268 |         N_attempts = 4
269 |         i = 0
270 |         while True:
271 |             if i>N_attempts:
272 |                 return(0)
273 |             else:
274 |                 i += 1
275 |             node_1_ind = random.choice(list(range(len(self.node_list))))
276 | 
277 |             # No self
278 |             node_2_options = [ind for ind in range(len(self.node_list)) if ind != node_1_ind]
279 | 
280 |             if (node_1_ind in self.input_node_indices) or (node_1_ind == self.bias_node_index):
281 |                 node_2_options = [ind for ind in node_2_options if (ind not in self.input_node_indices) and (ind != self.bias_node_index)]
282 |                 weight_connection_options = [(node_1_ind, ind) for ind in node_2_options if ((node_1_ind, ind) not in self.weights_list)]
283 | 
284 |             elif node_1_ind in self.output_node_indices:
285 |                 node_2_options = [ind for ind in node_2_options if ind not in self.output_node_indices]
286 |                 weight_connection_options = [(ind, node_1_ind) for ind in node_2_options if ((ind, node_1_ind) not in self.weights_list)]
287 | 
288 |             else:
289 |                 #if it's neither an input or output
290 | 
291 |                 # The options if node 2 is going to be the parent.
292 |                 node_2_weight_options_parent = [(ind, node_1_ind) for ind in node_2_options if (ind not in self.output_node_indices) and (not self.getsInputFrom(ind, node_1_ind))]
293 | 
294 |                 # In both cases, we need to check that either node_2 is not in prop_order
295 |                 # (meaning it can go anywhere, provided it's not i/o), OR that
296 |                 # it doesn't get indirect input from ind.
297 |                 #
298 |                 # The options if node 2 is going to be the child.
299 |                 node_2_weight_options_child = [(node_1_ind, ind) for ind in node_2_options if ((ind not in self.input_node_indices) and (ind != self.bias_node_index)) and (not self.getsInputFrom(node_1_ind, ind))]
300 | 
301 |                 # Combine them.
302 |                 weight_connection_options = node_2_weight_options_parent + node_2_weight_options_child
303 |                 weight_connection_options = [w for w in weight_connection_options if w not in self.weights_list]
304 | 
305 |             if len(weight_connection_options)==0:
306 |                 # If there aren't any options by this point, continue to try again
307 |                 continue
308 |             else:
309 |                 weight_connection_tuple = random.choice(weight_connection_options)
310 |                 break
311 | 
312 |         self.addConnectingWeight(weight_connection_tuple, val=None, std=std)
313 | 
314 | 
315 |     def mutateChangeWeight(self, std=0.1):
316 |         if len(self.weights_list)>0:
317 |             par_index, child_index = random.choice(list(self.weights_list))
318 |             self.print('changing weight between {} and {}'.format(par_index, child_index))
319 |             self.node_list[par_index].mutateOutputWeight(child_index, std=std)
320 | 
321 | 
322 |     def mutateRemoveWeight(self):
323 |         if len(self.weights_list)>0:
324 |             par_index, child_index = random.choice(list(self.weights_list))
325 |             self.print('removing weight between {} and {}'.format(par_index, child_index))
326 |             self.removeConnectingWeight((par_index, child_index))
327 | 
328 | 
329 |     def mutate(self, std=0.1):
330 | 
331 |         self.print('\n\nbefore mutate:')
332 |         if self.verbose:
333 |             self.printNetwork()
334 | 
335 |         if random.random() < self.node_add_chance:
336 |             # Add a node by splitting an existing weight
337 |             self.mutateAddNode()
338 | 
339 | 
340 |         if random.random() < self.weight_add_chance:
341 |             # Add weight between two nodes
342 |             self.mutateAddWeight(std=std)
343 | 
344 | 
345 |         if random.random() < self.weight_change_chance:
346 |             # Change weight
347 |             self.mutateChangeWeight(std=std)
348 | 
349 | 
350 |         if random.random() < self.weight_remove_chance:
351 |             # Remove weight
352 |             self.mutateRemoveWeight()
353 | 
354 | 
355 |         self.print('\nafter mutate:')
356 |         if self.verbose:
357 |             self.printNetwork()
358 | 
359 | 
360 | 
361 | 
362 |     def getsInputFrom(self, n1_index, n2_index):
363 | 
364 |         # This is to check if n1 gets input from n2, indirectly.
365 | 
366 |         n1 = self.node_list[n1_index]
367 |         n2 = self.node_list[n2_index]
368 |         lineage_q = Queue()
369 |         # You need this! Or it won't check its own parents!
370 |         lineage_q.put(n1_index)
371 |         [lineage_q.put(n) for n in n1.input_indices]
372 | 
373 |         while lineage_q.qsize() > 0:
374 |             next = lineage_q.get()
375 |             if n2_index in self.node_list[next].input_indices:
376 |                 return(True)
377 |             else:
378 |                 [lineage_q.put(n) for n in self.node_list[next].input_indices]
379 | 
380 |         return(False)
381 | 
382 | 
383 | 
384 |     def propagateNodeOutput(self, node_index):
385 | 
386 |         # This assumes that the propagate_order list is already sorted!
387 |         # If it isn't, you'll get some bad results.
388 |         node = self.node_list[node_index]
389 | 
390 |         for target_node_index in node.getOutputIndices():
391 |             self.node_list[target_node_index].addToInputsReceived(node.getValue()*node.output_weights[target_node_index])
392 | 
393 | 
394 |     def forwardPass(self, input_vec):
395 | 
396 |         self.clearAllNodes()
397 | 
398 |         # Put the input vec into the input nodes
399 |         for i, index in enumerate(self.input_node_indices):
400 |             self.node_list[index].value = input_vec[i]
401 | 
402 |         # For each node in the sorted propagate list, propagate to its children
403 |         for ind in self.propagate_order:
404 |             self.propagateNodeOutput(ind)
405 | 
406 |         output_vec = np.array([self.node_list[ind].getValue() for ind in self.output_node_indices])
407 | 
408 |         if self.action_space_type == 'discrete':
409 |             action = self.epsGreedyOutput(output_vec)
410 |         elif self.action_space_type == 'continuous':
411 |             # Need to fix if there are several cont. directions, but won't deal with that
412 |             # for now. Actually, it seems like even when it's one continuous action, you're
413 |             # supposed to supply it a list??
414 |             action = output_vec
415 | 
416 |         return(action)
417 | 
418 | 
419 |     def epsGreedyOutput(self, vec):
420 |         if random.random() < self.epsilon:
421 |             return(random.randint(0, len(vec)-1))
422 |         else:
423 |             return(self.greedyOutput(vec))
424 | 
425 | 
426 |     def greedyOutput(self, vec):
427 |         return(np.argmax(vec))
428 | 
429 | 
430 |     def softmaxOutput(self, vec):
431 |         a = np.array(vec)
432 |         a = np.exp(a)
433 |         a = a/sum(a)
434 |         return(np.random.choice(list(range(len(a))), p=a))
435 | 
436 | 
437 | 
438 |     def setMaxEpisodeSteps(self, N_steps):
439 |         self.agent.setMaxEpisodeSteps(N_steps)
440 | 
441 | 
442 |     def clearAllNodes(self):
443 |         [n.clearNode() for i,n in enumerate(self.node_list) if i!=self.bias_node_index]
444 | 
445 | 
446 |     def runEpisode(self, N_steps, **kwargs):
447 | 
448 | 
449 |         R_tot = 0
450 |         Rs = []
451 | 
452 |         show_episode = kwargs.get('show_episode', False)
453 |         record_episode = kwargs.get('record_episode', False)
454 | 
455 |         if show_episode:
456 |             self.createFig()
457 | 
458 |         if record_episode:
459 |             self.agent.setMonitorOn(show_run=show_episode)
460 | 
461 |         self.agent.initEpisode()
462 | 
463 |         for i in range(N_steps):
464 |             self.clearAllNodes()
465 | 
466 |             if i%int(N_steps/10)==0:
467 |                 self.print('R_tot = {:.3f}'.format(R_tot))
468 | 
469 | 
470 |             s = self.agent.getStateVec()
471 |             a = self.forwardPass(s)
472 |             self.print('s = {}, a = {}'.format(s, a))
473 | 
474 |             r, s, done = self.agent.iterate(a)
475 | 
476 |             R_tot += r
477 |             Rs.append(R_tot)
478 | 
479 |             if done:
480 |                 #return(R_tot)
481 |                 break
482 | 
483 |             if show_episode or record_episode:
484 |                 if self.render_type == 'matplotlib':
485 |                     self.agent.drawState(self.axes[0])
486 |                     self.axes[1].clear()
487 |                     self.axes[1].plot(Rs)
488 |                     self.fig.canvas.draw()
489 |                 elif self.render_type == 'gym':
490 |                     self.agent.drawState()
491 | 
492 | 
493 |         if record_episode:
494 |             print('R_tot = {:.3f}'.format(R_tot))
495 | 
496 |         self.print('R_tot/N_steps = {:.3f}'.format(R_tot/N_steps))
497 | 
498 |         return(R_tot)
499 | 
500 | 
501 | 
502 | 
503 |     def gaussMutate(self, std=0.1):
504 |         # This mutates ALL of a node's output weights!
505 |         for n in self.node_list:
506 |             for w in n.getOutputIndices():
507 |                 n.output_weights[w] += np.random.normal(scale=std)
508 | 
509 | 
510 | 
511 | 
512 | 
513 | 
514 | 
515 | 
516 |     def clone(self):
517 |         clone = deepcopy(self)
518 |         return(clone)
519 | 
520 | 
521 |     def createFig(self):
522 |         if self.render_type == 'matplotlib':
523 |             self.fig, self.axes = plt.subplots(1,2, figsize=(16,8))
524 |             plt.show(block=False)
525 | 
526 | 
527 | 
528 | 
529 |     def print(self, str):
530 | 
531 |         if self.verbose:
532 |             print(str)
533 | 
534 | 
535 | 
536 |     def printNetwork(self):
537 |         print('\n')
538 |         for i, n in enumerate(self.node_list):
539 |             print('\nnode ', i)
540 |             print('input indices:', n.input_indices)
541 |             print('output indices: ', n.getOutputIndices())
542 |             print('output weights: ', n.getOutputWeightStr())
543 | 
544 |         print()
545 | 
546 | 
547 |     def plotNetwork(self, show_plot=True, save_plot=False, fname=None, node_legend=False):
548 | 
549 |         fig, ax = plt.subplots(1, 1, figsize=(12,8))
550 |         DG = nx.DiGraph()
551 | 
552 |         other_node_indices = [i for i,n in enumerate(self.node_list) if ((i not in self.input_node_indices) and (i not in self.output_node_indices) and (i != self.bias_node_index))]
553 | 
554 |         DG.add_node(self.bias_node_index)
555 | 
556 |         for i in self.input_node_indices:
557 |             DG.add_node(i)
558 | 
559 |         for i in self.output_node_indices:
560 |             DG.add_node(i)
561 | 
562 |         # I think you have to add this, because if you have a node that doesn't have any connections
563 |         # and it's not I/O/B, then it will never get entered into DG without this.
564 |         for i in other_node_indices:
565 |             DG.add_node(i)
566 | 
567 |         for n in self.node_list:
568 |             for o in n.getOutputIndices():
569 |                 DG.add_edges_from([(n.node_index, o)])
570 | 
571 |         pos = nx.drawing.nx_agraph.graphviz_layout(DG, prog='dot')
572 | 
573 |         try:
574 |             nx.draw_networkx_nodes(DG, nodelist=self.input_node_indices, pos=pos, node_color='lightgreen', node_size=600)
575 |             nx.draw_networkx_nodes(DG, nodelist=self.output_node_indices, pos=pos, node_color='orange', node_size=600)
576 |             nx.draw_networkx_nodes(DG, nodelist=[self.bias_node_index], pos=pos, node_color='forestgreen', node_size=600)
577 |             nx.draw_networkx_nodes(DG, nodelist=other_node_indices, pos=pos, node_color='plum', node_size=600)
578 |         except:
579 |             print('problem drawing nx nodes. pos:')
580 |             print(pos)
581 |             exit()
582 | 
583 |         for w in self.weights_list:
584 |             weight = self.node_list[w[0]].output_weights[w[1]]
585 |             if weight < 0:
586 |                 nx.draw_networkx_edges(DG, pos=pos, edgelist=[w], width=4.0, alpha=min(abs(weight), 1), edge_color='tomato')
587 | 
588 |             if weight >= 0:
589 |                 nx.draw_networkx_edges(DG, pos=pos, edgelist=[w], width=4.0, alpha=min(abs(weight), 1), edge_color='dodgerblue')
590 | 
591 |         labels = {i:str(i) for i in range(len(self.node_list))}
592 |         nx.draw_networkx_labels(DG, pos=pos, labels=labels, font_size=14)
593 |         edge_labels = {w:'{:.2f}'.format(self.node_list[w[0]].output_weights[w[1]]) for w in self.weights_list}
594 |         nx.draw_networkx_edge_labels(DG, pos=pos, edge_labels=edge_labels, font_size=10, bbox={'alpha':0.2, 'pad':0.0}, label_pos=0.85)
595 | 
596 |         plt.xticks([])
597 |         plt.yticks([])
598 |         plt.subplots_adjust(left=.2, bottom=0, right=1, top=1, wspace=1, hspace=0)
599 |         ax.axis('off')
600 | 
601 |         if node_legend:
602 |             if (self.agent.state_labels is not None) and (self.agent.action_labels is not None):
603 |                 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
604 | 
605 |                 percent_offset = 0.02
606 | 
607 |                 bias_str = 'Bias: node {}\n\n'.format(self.bias_node_index)
608 |                 input_str = bias_str + 'Inputs:\n\n' + '\n'.join(['node {} = {}'.format(ind, self.agent.state_labels[i]) for i, ind in enumerate(self.input_node_indices)])
609 |                 ax.text(-percent_offset, (1-3*percent_offset), input_str, transform=ax.transAxes, fontsize=10, verticalalignment='top', horizontalalignment='right', bbox=props)
610 | 
611 |                 output_str = 'Outputs:\n\n' + '\n'.join(['node {} = {}'.format(ind, self.agent.action_labels[i]) for i, ind in enumerate(self.output_node_indices)])
612 |                 ax.text(-percent_offset, 3*percent_offset, output_str, transform=ax.transAxes, fontsize=10, verticalalignment='bottom', horizontalalignment='right', bbox=props)
613 |                 textstr = input_str + '\n\n' + output_str
614 | 
615 | 
616 |                 # place a text box in upper left in axes coords
617 | 
618 |         if save_plot:
619 |             if fname is not None:
620 |                 plt.savefig(fname)
621 |             else:
622 |                 default_fname = 'misc_runs/{}_NN_{}.png'.format(self.agent_class.__name__, fst.getDateString())
623 |                 plt.savefig(default_fname)
624 | 
625 |         if show_plot:
626 |             plt.show()
627 | 
628 |         plt.close()
629 | 
630 | 
631 | 
632 | 
633 | 
634 | 
635 | 
636 | 
637 | 
638 | 
639 | 
640 | 
641 | 
642 | 
643 | 
644 | 
645 | '''
646 | SCRAP
647 | 
648 | 
649 | 
650 | 
651 | 
652 | 
653 |         # When finished, so all output nodes should be full
654 |         self.print('\n\nProp. done, output node values:')
655 |         for ind in self.output_node_indices:
656 |             self.print('Node {} output: {:.3f}'.format(ind, self.node_list[ind].value))
657 | 
658 | 
659 | 
660 | 
661 |         if par_index != self.bias_node_index:
662 |             self.node_list[self.bias_node_index].addToOutputWeights(new_node.node_index)
663 |             self.node_list[self.bias_node_index].output_weights[new_node.node_index] = 0
664 |             new_node.addToInputIndices(self.bias_node_index)
665 | 
666 | 
667 | 
668 |         # Bias node
669 |         bias_node = Node(len(self.node_list))
670 |         bias_node.setToBiasNode()
671 |         #bias_node.setOutputIndices(self.output_node_indices)
672 |         self.node_list.append(bias_node)
673 | 
674 | 
675 |         # Add input nodes
676 |         for i in range(self.N_inputs):
677 |             new_node = Node(len(self.node_list))
678 |             new_node.setOutputIndices(self.output_node_indices)
679 |             #uself.weights_list.append()
680 |             new_node.setRandomOutputWeights()
681 |             new_node.setToInputNode()
682 |             self.node_list.append(new_node)
683 | 
684 | 
685 |         # Add output nodes
686 |         for i in range(self.N_total_outputs):
687 |             new_node = Node(len(self.node_list))
688 |             new_node.setInputIndices(self.input_node_indices)
689 |             #new_node.addToInputIndices(self.bias_node_index)
690 |             new_node.setToOutputNode()
691 |             self.node_list.append(new_node)
692 | 
693 |         # Add hidden layer nodes
694 |         for i in range(self.N_init_hidden_nodes):
695 |             new_node = Node(len(self.node_list))
696 |             new_node.setInputIndices(self.input_node_indices)
697 |             #new_node.addToInputIndices(self.bias_node_index)
698 |             new_node.setOutputIndices(self.output_node_indices)
699 |             new_node.setRandomOutputWeights()
700 | 
701 |             #self.node_list[self.bias_node_index].addToOutputWeights(new_node.node_index)
702 | 
703 |             for ii in self.input_node_indices:
704 |                 self.node_list[ii].addToOutputWeights(new_node.node_index)
705 | 
706 |             for o in self.output_node_indices:
707 |                 self.node_list[o].addToInputIndices(new_node.node_index)
708 | 
709 |             self.node_list.append(new_node)
710 | 
711 |         # Set initial random output weight
712 |         for i, n in enumerate(self.node_list):
713 |             N_incoming_connect = n.getNInputs()
714 |             for j in n.input_indices:
715 |                 self.node_list[j].output_weights[i] = np.random.normal(scale=(1.0/N_incoming_connect))
716 | 
717 |         # Set all the bias weights to 0 to start.
718 |         for i in self.node_list[self.bias_node_index].getOutputIndices():
719 |             self.node_list[self.bias_node_index].output_weights[i] = 0
720 | 
721 |         self.node_list[self.bias_node_index].value = 1
722 | 
723 | 
724 | 
725 | if (node_1_ind not in self.propagate_order) and not ():
726 |     # This one is easy: if it's not in propagate_order, then it's not connected to anything else,
727 |     # so we can attach it to any other.
728 |     node_2_ind = random.choice(node_2_options)
729 |     if (node_2_ind in self.input_node_indices) or node_2_ind == self.bias_node_index:
730 |         weight_connection_tuple = (node_2_ind, node_1_ind)
731 | 
732 |     elif node_2_ind in self.output_node_indices:
733 |         weight_connection_tuple = (node_1_ind, node_2_ind)
734 | 
735 |     else:
736 |         if random.random() < 0.5:
737 |             weight_connection_tuple = (node_1_ind, node_2_ind)
738 |         else:
739 |             weight_connection_tuple = (node_2_ind, node_1_ind)
740 | 
741 |     break
742 | 
743 | 
744 | 
745 | 
746 | 
747 | 
748 | 
749 | 
750 | 
751 | 
752 | 
753 | 
754 | 
755 | '''
756 | 
757 | 
758 | #
759 | 


--------------------------------------------------------------------------------