├── .gitignore
├── misc
└── cover_img_neat.png
├── __pycache__
├── NN.cpython-36.pyc
├── EPANN.cpython-36.pyc
├── Node.cpython-36.pyc
├── agent1.cpython-36.pyc
├── GymAgent.cpython-36.pyc
├── PopTests.cpython-36.pyc
├── Population.cpython-36.pyc
├── Walker_1D.cpython-36.pyc
├── CartPoleAgent.cpython-36.pyc
├── CartpoleAgent.cpython-36.pyc
├── PendulumAgent.cpython-36.pyc
├── movie_combine.cpython-36.pyc
├── profileOutput.cpython-36.pyc
├── LunarLanderAgent.cpython-36.pyc
└── PuckworldAgent.cpython-36.pyc
├── scrap_old_test
├── networkx_test.py
├── Walker_1D.py
├── LunarLanderAgent.py
├── CartPoleAgent.py
├── PendulumAgent.py
├── PuckworldAgent.py
└── agent1.py
├── createEnvJson.py
├── gym_env_info.json
├── README.md
├── ev1.py
├── ablation_test.py
├── movie_combine.py
└── classes
├── GymAgent.py
├── Node.py
├── FileSystemTools.py
├── PopTests.py
├── Population.py
└── EPANN.py
/.gitignore:
--------------------------------------------------------------------------------
1 | misc_runs/
2 | save_runs/
3 | scrap_old_test*
4 | misc_runs*
5 | save_runs*
6 |
7 |
--------------------------------------------------------------------------------
/misc/cover_img_neat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/misc/cover_img_neat.png
--------------------------------------------------------------------------------
/__pycache__/NN.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/NN.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/EPANN.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/EPANN.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/Node.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/Node.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/agent1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/agent1.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/GymAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/GymAgent.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/PopTests.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/PopTests.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/Population.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/Population.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/Walker_1D.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/Walker_1D.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/CartPoleAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/CartPoleAgent.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/CartpoleAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/CartpoleAgent.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/PendulumAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/PendulumAgent.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/movie_combine.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/movie_combine.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/profileOutput.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/profileOutput.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/LunarLanderAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/LunarLanderAgent.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/PuckworldAgent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/declanoller/neat/HEAD/__pycache__/PuckworldAgent.cpython-36.pyc
--------------------------------------------------------------------------------
/scrap_old_test/networkx_test.py:
--------------------------------------------------------------------------------
1 |
2 | from EPANN import EPANN
3 | from LunarLanderAgent import LunarLanderAgent
4 |
5 |
6 | e = EPANN(agent_class=LunarLanderAgent, render_type='gym', N_init_hidden_nodes=0, init_IO_weights=True)
7 |
8 | e.plotNetwork(show_plot=True, node_legend=True)
9 |
10 |
11 |
12 |
13 |
14 |
15 | #
16 |
--------------------------------------------------------------------------------
/createEnvJson.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./classes')
3 | import json
4 |
5 | env_info_dict = {
6 |
7 | 'Pendulum' : {
8 | 'gym_env_name' : 'Pendulum-v0',
9 | 'state_labels' : ['cos(ang)', 'sin(ang)', 'ang_vel'],
10 | 'action_labels' : ['torque'],
11 | 'action_space_type' : 'continuous',
12 | 'max_episode_steps' : 200
13 | },
14 |
15 | 'LunarLander' : {
16 | 'gym_env_name' : 'LunarLander-v2',
17 | 'state_labels' : ['pos_x', 'pos_y', 'v_x', 'v_y', 'angle', 'v_ang'],
18 | 'action_labels' : ['nothing', 'engine_L', 'engine_main', 'engine_R'],
19 | 'action_space_type' : 'discrete',
20 | 'max_episode_steps' : 500
21 | },
22 |
23 | 'CartPole' : {
24 | 'gym_env_name' : 'CartPole-v0',
25 | 'state_labels' : ['pos_cart', 'v_cart','pole_angle', 'v_poletip'],
26 | 'action_labels' : ['cart_L', 'cart_R',],
27 | 'action_space_type' : 'discrete',
28 | 'max_episode_steps' : 200
29 | },
30 |
31 |
32 | }
33 |
34 |
35 | fname = 'gym_env_info.json'
36 |
37 | with open(fname, 'w') as outfile:
38 | json.dump(env_info_dict, outfile, indent=4)
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 | #
49 |
--------------------------------------------------------------------------------
/gym_env_info.json:
--------------------------------------------------------------------------------
1 | {
2 | "Pendulum": {
3 | "gym_env_name": "Pendulum-v0",
4 | "state_labels": [
5 | "cos(ang)",
6 | "sin(ang)",
7 | "ang_vel"
8 | ],
9 | "action_labels": [
10 | "torque"
11 | ],
12 | "action_space_type": "continuous",
13 | "max_episode_steps": 200
14 | },
15 | "LunarLander": {
16 | "gym_env_name": "LunarLander-v2",
17 | "state_labels": [
18 | "pos_x",
19 | "pos_y",
20 | "v_x",
21 | "v_y",
22 | "angle",
23 | "v_ang"
24 | ],
25 | "action_labels": [
26 | "nothing",
27 | "engine_L",
28 | "engine_main",
29 | "engine_R"
30 | ],
31 | "action_space_type": "discrete",
32 | "max_episode_steps": 500
33 | },
34 | "CartPole": {
35 | "gym_env_name": "CartPole-v0",
36 | "state_labels": [
37 | "pos_cart",
38 | "v_cart",
39 | "pole_angle",
40 | "v_poletip"
41 | ],
42 | "action_labels": [
43 | "cart_L",
44 | "cart_R"
45 | ],
46 | "action_space_type": "discrete",
47 | "max_episode_steps": 200
48 | }
49 | }
--------------------------------------------------------------------------------
/scrap_old_test/Walker_1D.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 |
5 | class Walker_1D:
6 |
7 |
8 | def __init__(self):
9 |
10 | self.lims = np.array([-1.0, 1.0])
11 | self.width = self.lims[1] - self.lims[0]
12 |
13 | self.step_size = self.width/50.0
14 |
15 | self.position = 0
16 | self.target_position = None
17 |
18 | self.N_state_terms = len(self.getStateVec())
19 | self.N_actions = 2
20 |
21 |
22 |
23 | def getStateVec(self):
24 | return(np.array([self.position, self.target_position]))
25 |
26 |
27 | def initEpisode(self):
28 | self.resetPosition()
29 | self.resetTarget()
30 |
31 |
32 | def resetTarget(self):
33 |
34 | x = np.random.random()
35 | self.target_position = self.lims[0] + self.width*x
36 | # print('new target pos: {:.3f}'.format(self.target_position))
37 |
38 |
39 | def resetPosition(self):
40 | self.position = 0
41 |
42 |
43 | def iterate(self, action):
44 | # Action 0 is go L, action 1 is go R.
45 | add_x = (action - 0.5)*2
46 | # maps 0,1 to -1,1
47 | self.position += add_x*self.step_size
48 | self.position = max(self.position, self.lims[0] + self.step_size)
49 | self.position = min(self.position, self.lims[1] - self.step_size)
50 | return(self.reward(), self.getStateVec(), False)
51 |
52 |
53 |
54 | def reward(self):
55 |
56 | if abs(self.position - self.target_position) <= 1.2*self.step_size:
57 | self.resetTarget()
58 | return(1.0)
59 | else:
60 | return(-0.01)
61 |
62 |
63 |
64 | def drawState(self, ax):
65 |
66 | ax.clear()
67 | ax.set_xlim(tuple(self.lims))
68 | ax.set_ylim(tuple(self.lims))
69 |
70 | ax.set_xlabel('x')
71 | ax.set_ylabel('y')
72 | ax.set_aspect('equal')
73 |
74 | ag = plt.Circle((self.position, 0), 0.03, color='tomato')
75 | ax.add_artist(ag)
76 |
77 | if self.target_position is not None:
78 | target = plt.Circle((self.target_position, 0), 0.03, color='seagreen')
79 | ax.add_artist(target)
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 | #
88 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | Experiments with playing OpenAI games with NEAT
3 | ==========================================
4 |
5 |
6 |
7 |
8 |
9 | Overview
10 | --------------------------------
11 |
12 | This project uses Kenneth Stanley's popular [NEAT framework](https://en.wikipedia.org/wiki/Neuroevolution_of_augmenting_topologies) to evolve neural networks to play OpenAI gym games. `Node` objects are the basic unit of `EPANN` objects, a collection of which forms the `Population` object. Tests to compare different population parameters can be done with `PopTests.py`.
13 |
14 |
15 | Main scripts
16 | -------------------------------
17 |
18 | These are the scripts I run, that use the classes described below. They live in the main dir. A brief description:
19 |
20 | * `ev1.py` - Creates a `Population` object for a given class and evolves the population.
21 | * `ablation_test.py` - Takes a fully formed network and repeatedly removes a connection and then evaluates the FF of that network, starting from the smallest connection weight up, to find the critical ones.
22 | * `movie_combine.py` - Combines several movie files into a single movie file in a grid format.
23 | * `createEnvJson.py` - Writes a dict of info about the gym envs (or other envs) to use for labels, etc
24 |
25 |
26 | Classes
27 | --------------------------------
28 |
29 | * `Node.py` - The basic unit of the network. Can be set to be an input, output, or bias node.
30 | * `EPANN.py` - The network. It starts from just input, bias, and output nodes. The number of I/O nodes are based on the number of inputs/outputs of the agent class being simulated. Nodes and connections are then added and removed via mutations. It is also responsible for running an episode of the agent class to evaluate the FF.
31 | * `Population.py` - This creates a population of `EPANN` objects and repeatedly evaluates their FF's, sorts them via their FF's, and then keeps and mutates the best of the population. This is all done via its `evolve()` function.
32 | * `PopTests.py` - This does several evolutions of different `Population` objects, with different parameters, to compare how different population parameters (trading off `N_gen` vs `N_pop` for example) affects evolution.
33 | * `GymAgent.py` - This is a generic wrapper class for `gym` agents.
34 | * `FileSystemTools.py` - Just a little list of custom functions that I found myself using often, mostly string formatting type stuff.
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 | #
45 |
--------------------------------------------------------------------------------
/ev1.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./classes')
3 | from EPANN import EPANN
4 | from Population import Population
5 | import RunTools as rt
6 | from GymAgent import GymAgent
7 | from time import time
8 | import numpy as np
9 |
10 | ea = EPANN(agent_class=GymAgent, env_name='CartPole')
11 |
12 | #ea.plotNetwork()
13 |
14 | ea.addConnectingWeight((0,4))
15 | ea.addConnectingWeight((1,4))
16 | ea.addNodeInBetween(1,4)
17 |
18 | ea.addConnectingWeight((2,5))
19 | ea.addNode()
20 | ea.addConnectingWeight((2,6))
21 | ea.addConnectingWeight((3,4))
22 | ea.addConnectingWeight((3,5))
23 | ea.addConnectingWeight((6,5))
24 | #ea.addAtomInBetween((2,5))
25 |
26 |
27 | N_tests = 100000
28 |
29 | inputs = np.random.random((N_tests, 4))
30 |
31 | st = time()
32 | for i in range(N_tests):
33 |
34 | ea.forwardPass(inputs[i])
35 |
36 |
37 | print('time elapsed:', time() - st)
38 |
39 | #ea.plotNetwork()
40 |
41 |
42 |
43 |
44 | exit()
45 |
46 |
47 | p1 = Population(agent_class=GymAgent, env_name='CartPole', N_pop=64, mut_type='change_topo', std=1.0, render_type='gym')
48 |
49 | p1.evolve(N_gen=128, N_episode_steps=200, N_trials_per_agent=2, N_runs_with_best=2, record_final_runs=False, show_final_runs=False)
50 |
51 | exit(0)
52 |
53 |
54 |
55 |
56 | evolve_params = {
57 | 'N_runs' : 3,
58 | 'agent_class' : GymAgent,
59 | 'env_name' : 'LunarLander',
60 | 'N_pop' : 64,
61 | 'mut_type' : 'change_topo',
62 | 'std' : [0.01, 0.1, 1, 10],
63 | 'N_gen' : 256,
64 | 'N_trials_per_agent' : 2,
65 | 'N_runs_with_best' : 9,
66 | 'record_final_runs' : True,
67 | 'show_final_runs' : False
68 | }
69 |
70 |
71 | rt.varyParam(object_class=Population, run_fn=Population.evolve, run_result_var='best_individ_avg_score', **evolve_params)
72 |
73 | exit()
74 |
75 |
76 |
77 |
78 | evolve_params = {
79 | 'N_runs' : 3,
80 | 'agent_class' : GymAgent,
81 | 'env_name' : 'LunarLander',
82 | 'N_pop' : 64,
83 | 'mut_type' : 'change_topo',
84 | 'std' : [0.01, 0.1, 1.0, 10.0],
85 | 'N_gen' : 256,
86 | 'N_trials_per_agent' : 2,
87 | 'N_runs_with_best' : 9,
88 | 'record_final_runs' : True,
89 | 'show_final_runs' : False
90 | }
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 | e = EPANN(agent_class=PendulumAgent)
101 |
102 | e.loadNetworkFromFile(
103 | '/home/declan/Documents/code/evo1/misc_runs/evolve_22-01-2019_18-01-04__PendulumAgent' +
104 | '/' + 'bestNN_PendulumAgent_22-01-2019_18-01-04' + '.json'
105 | )
106 |
107 | exit(0)
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 | #
116 |
--------------------------------------------------------------------------------
/scrap_old_test/LunarLanderAgent.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import gym
4 | from gym import wrappers
5 | import FileSystemTools as fst
6 |
7 | '''
8 |
9 | need to provide:
10 |
11 | --state labels (for each state var)
12 | --action labels (for each action var)
13 | --N_state_terms
14 | --N_actions
15 |
16 | functions:
17 |
18 | --getStateVec()
19 | --initEpisode()
20 | --iterate() (returns a tuple of (reward, state, boolean isDone))
21 |
22 | '''
23 |
24 |
25 |
26 | class LunarLanderAgent:
27 |
28 |
29 | def __init__(self, **kwargs):
30 |
31 | self.env = gym.make('LunarLander-v2')
32 | gym.logger.set_level(40)
33 | self.state_labels = ['pos_x', 'pos_y', 'v_x', 'v_y', 'angle', 'v_ang']
34 | self.action_labels = ['nothing', 'engine_L', 'engine_main', 'engine_R',]
35 | # Last two states are whether the legs are touching the ground or not.
36 | # I'm not including them here.
37 | self.N_state_terms = 6
38 | self.N_actions = self.env.action_space.n
39 | self.action_space_type = 'discrete'
40 | self.state = self.env.reset()
41 | dt = fst.getDateString()
42 | self.base_name = f'LunarLander_{dt}'
43 | self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/')
44 | self.monitor_is_on = False
45 |
46 |
47 |
48 |
49 | def setMonitorOn(self):
50 | # It seems like when I call this, it gives a warning about the env not being
51 | # made with gym.make (which it is...), but if I call it only once for the same
52 | # agent, it doesn't run it every time I call it?
53 | #if not self.monitor_is_on:
54 | #
55 | # Also, it seems like you can't record the episode without showing it on the screen.
56 | # See https://github.com/openai/gym/issues/347 maybe?
57 | if True:
58 | self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name)
59 | self.env = wrappers.Monitor(self.env, self.record_dir)
60 | self.monitor_is_on = True
61 |
62 |
63 | def getStateVec(self):
64 | return(self.state[:self.N_state_terms])
65 |
66 |
67 | def initEpisode(self):
68 | self.state = self.env.reset()
69 |
70 |
71 | def iterate(self, action):
72 | # Action 0 is go L, action 1 is go R.
73 | observation, reward, done, info = self.env.step(action)
74 | self.state = observation
75 |
76 | return(reward, self.state, done)
77 |
78 |
79 |
80 |
81 |
82 |
83 | def drawState(self):
84 |
85 | self.env.render()
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | #
94 |
--------------------------------------------------------------------------------
/scrap_old_test/CartPoleAgent.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import gym
4 | from gym import wrappers
5 | import FileSystemTools as fst
6 |
7 | '''
8 |
9 | need to provide:
10 |
11 | --state labels (for each state var)
12 | --action labels (for each action var)
13 | --N_state_terms
14 | --N_actions
15 | --action_space_type
16 |
17 | functions:
18 |
19 | --getStateVec()
20 | --initEpisode()
21 | --iterate() (returns a tuple of (reward, state, boolean isDone))
22 |
23 | '''
24 |
25 |
26 |
27 | class CartPoleAgent:
28 |
29 |
30 | def __init__(self, **kwargs):
31 |
32 | self.env = gym.make('CartPole-v0')
33 | gym.logger.set_level(40)
34 | self.state_labels = ['pos_cart', 'v_cart','pole_angle', 'v_poletip']
35 | self.action_labels = ['cart_L', 'cart_R',]
36 | # Last two states are whether the legs are touching the ground or not.
37 | # I'm not including them here.
38 | self.N_state_terms = len(self.env.reset())
39 | self.N_actions = self.env.action_space.n
40 | self.action_space_type = 'discrete'
41 | self.state = self.env.reset()
42 | dt = fst.getDateString()
43 | self.base_name = f'CartPole_{dt}'
44 | self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/')
45 | self.monitor_is_on = False
46 |
47 |
48 | def setMaxEpisodeSteps(self, N_steps):
49 |
50 | self.env._max_episode_steps = N_steps
51 | self.env.spec.max_episode_steps = N_steps
52 | self.env.spec.timestep_limit = N_steps
53 |
54 |
55 | def setMonitorOn(self):
56 | # It seems like when I call this, it gives a warning about the env not being
57 | # made with gym.make (which it is...), but if I call it only once for the same
58 | # agent, it doesn't run it every time I call it?
59 | #if not self.monitor_is_on:
60 | #
61 | # Also, it seems like you can't record the episode without showing it on the screen.
62 | # See https://github.com/openai/gym/issues/347 maybe?
63 | if True:
64 | self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name)
65 | self.env = wrappers.Monitor(self.env, self.record_dir)
66 | self.monitor_is_on = True
67 |
68 |
69 | def getStateVec(self):
70 | return(self.state[:self.N_state_terms])
71 |
72 |
73 | def initEpisode(self):
74 | self.state = self.env.reset()
75 |
76 |
77 | def iterate(self, action):
78 | # Action 0 is go L, action 1 is go R.
79 | observation, reward, done, info = self.env.step(action)
80 | self.state = observation
81 |
82 | return(reward, self.state, done)
83 |
84 |
85 |
86 |
87 |
88 |
89 | def drawState(self):
90 |
91 | self.env.render()
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 | #
100 |
--------------------------------------------------------------------------------
/scrap_old_test/PendulumAgent.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import gym
4 | from gym import wrappers
5 | import FileSystemTools as fst
6 |
7 | '''
8 |
9 | need to provide:
10 |
11 | --state labels (for each state var)
12 | --action labels (for each action var)
13 | --N_state_terms
14 | --N_actions
15 |
16 | functions:
17 |
18 | --getStateVec()
19 | --initEpisode()
20 | --iterate() (returns a tuple of (reward, state, boolean isDone))
21 | --setMaxEpisodeSteps()
22 |
23 | '''
24 |
25 |
26 |
27 | class PendulumAgent:
28 |
29 |
30 | def __init__(self, **kwargs):
31 |
32 | self.env = gym.make('Pendulum-v0')
33 | gym.logger.set_level(40)
34 | self.state_labels = ['cos(ang)', 'sin(ang)', 'ang_vel']
35 | self.action_labels = ['torque']
36 | # Last two states are whether the legs are touching the ground or not.
37 | # I'm not including them here.
38 | self.N_state_terms = len(self.env.reset())
39 | self.N_actions = 1
40 | self.action_space_type = 'continuous'
41 | self.state = self.env.reset()
42 | dt = fst.getDateString()
43 | self.base_name = f'Pendulum_{dt}'
44 | self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/')
45 | self.monitor_is_on = False
46 |
47 |
48 |
49 | def setMaxEpisodeSteps(self, N_steps):
50 |
51 | self.env._max_episode_steps = N_steps
52 | self.env.spec.max_episode_steps = N_steps
53 | self.env.spec.timestep_limit = N_steps
54 |
55 |
56 | def closeEnv(self):
57 | # This doesn't seem to be a good idea to use with monitor?
58 | self.env.close()
59 |
60 |
61 | def setMonitorOn(self, show_run=True):
62 | # It seems like when I call this, it gives a warning about the env not being
63 | # made with gym.make (which it is...), but if I call it only once for the same
64 | # agent, it doesn't run it every time I call it?
65 | #if not self.monitor_is_on:
66 | #
67 | # Also, it seems like you can't record the episode without showing it on the screen.
68 | # See https://github.com/openai/gym/issues/347 maybe?
69 | if True:
70 | self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name)
71 | if show_run:
72 | self.env = wrappers.Monitor(self.env, self.record_dir)
73 | else:
74 | self.env = wrappers.Monitor(self.env, self.record_dir, video_callable=False, force=True)
75 | self.monitor_is_on = True
76 |
77 |
78 | def getStateVec(self):
79 | return(self.state[:self.N_state_terms])
80 |
81 |
82 | def initEpisode(self):
83 | self.state = self.env.reset()
84 |
85 |
86 | def iterate(self, action):
87 | # Action 0 is go L, action 1 is go R.
88 | observation, reward, done, info = self.env.step(action)
89 | self.state = observation
90 |
91 | return(reward, self.state, done)
92 |
93 |
94 |
95 |
96 |
97 |
98 | def drawState(self):
99 |
100 | self.env.render()
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 | #
109 |
--------------------------------------------------------------------------------
/ablation_test.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./classes')
3 | from EPANN import EPANN
4 | from GymAgent import GymAgent
5 | import matplotlib.pyplot as plt
6 | import FileSystemTools as fst
7 | import numpy as np
8 |
9 | params = {}
10 | params['env_name'] = 'Pendulum'
11 |
12 | e = EPANN(agent_class=GymAgent, env_name=params['env_name'])
13 |
14 | path = '/home/declan/Documents/code/evo1/save_runs/evolve_23-01-2019_18-45-18__GymAgentPendulum_good'
15 | params['NN_file'] = fst.combineDirAndFile(path, 'bestNN_GymAgent_23-01-2019_18-45-18' + '.json')
16 |
17 | e.loadNetworkFromFile(params['NN_file'])
18 |
19 | datetime_str = fst.getDateString()
20 | dir = fst.combineDirAndFile('misc_runs', 'ablation_{}_{}'.format(datetime_str, params['env_name']))
21 | fst.makeDir(dir)
22 | plot_dir = fst.makeDir(fst.combineDirAndFile(dir, 'plots'))
23 |
24 | log_output_str = ''
25 |
26 | params['N_runs_per_NN'] = 50
27 | params['N_episode_steps'] = e.agent.max_episode_steps
28 |
29 | params['N_weights_to_remove'] = len(e.weights_list)
30 |
31 | ablation_FF_mean_std = []
32 |
33 | for w_removed in range(params['N_weights_to_remove']):
34 |
35 | # Run the ablation for several times to get stats
36 | ablation_scores = []
37 | for run in range(params['N_runs_per_NN']):
38 | ablation_scores.append(e.runEpisode(params['N_episode_steps']))
39 |
40 | # Add the mean and std
41 | ablation_FF_mean_std.append([w_removed, np.mean(ablation_scores), np.std(ablation_scores)])
42 |
43 | # Save what the NN currently looks like
44 | NN_save_fname = fst.combineDirAndFile(plot_dir, 'NN_plot_{}w_removed.png'.format(w_removed))
45 | e.plotNetwork(show_plot=False, save_plot=True, fname=NN_save_fname, node_legend=True)
46 |
47 | # Remove the next smallest weight
48 | smallest_weight_connection = min(e.weights_dict, key=lambda x: abs(e.weights_dict.get(x)))
49 | remove_str = 'Removing weight {} that has value {:.3f}\n'.format(smallest_weight_connection, e.weights_dict[smallest_weight_connection])
50 | print(remove_str)
51 | log_output_str += remove_str
52 | e.removeConnectingWeight(smallest_weight_connection)
53 |
54 |
55 |
56 | # Save params
57 | fst.writeDictToFile(params, fst.combineDirAndFile(dir, 'Params_logfile_{}.log'.format(datetime_str)))
58 |
59 | # Save weight order removal
60 | removal_log_fname = fst.combineDirAndFile(dir, 'Weight_remove_order_{}.txt'.format(datetime_str))
61 | with open(removal_log_fname, 'w+') as f:
62 | f.write(log_output_str)
63 |
64 | # Plot the mean and std FF as a function of removing weights
65 | ablation_FF_mean_std = np.array(ablation_FF_mean_std)
66 | weights_removed = ablation_FF_mean_std[:, 0]
67 | FF_mean = ablation_FF_mean_std[:, 1]
68 | FF_std = ablation_FF_mean_std[:, 2]
69 | plt.fill_between(
70 | np.array(range(len(FF_mean))),
71 | FF_mean - FF_std,
72 | FF_mean + FF_std,
73 | facecolor='dodgerblue', alpha=0.5)
74 |
75 | plt.plot(FF_mean, color='mediumblue')
76 | plt.xlabel('# weights removed')
77 | plt.ylabel('FF')
78 | plt.title('Ablation test, FF over {} episodes each'.format(params['N_runs_per_NN']))
79 | fname = fst.combineDirAndFile(dir, '{}_{}.png'.format('ablation_FF_mean-std_plot', datetime_str))
80 | plt.savefig(fname)
81 |
82 | # Save mean/std
83 | fname = fst.combineDirAndFile(dir, '{}_{}.txt'.format('ablation_FF_mean-std', datetime_str))
84 | np.savetxt(fname, ablation_FF_mean_std, fmt='%.4f')
85 |
86 |
87 |
88 |
89 | #
90 |
--------------------------------------------------------------------------------
/movie_combine.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./classes')
3 | from moviepy.editor import VideoFileClip, clips_array, vfx
4 | import argparse
5 | import FileSystemTools as fst
6 | import glob
7 | import subprocess
8 | import os
9 |
10 |
11 | def combineMovieFiles(**kwargs):
12 |
13 | path = kwargs.get('path', None)
14 | file_type = kwargs.get('file_type', 'mp4')
15 | grid_size = kwargs.get('grid_size', '1x1')
16 | make_gif = kwargs.get('make_gif', True)
17 |
18 | # get the files with the video clip extension type
19 | file_list = glob.glob(fst.addTrailingSlashIfNeeded(path) + '*' + file_type)
20 | print('{} files of type {} found'.format(len(file_list), file_type))
21 |
22 | # make sure you've passed a grid size argument
23 | assert grid_size != '0', 'need to provide a grid_size arg of form x'
24 |
25 | try:
26 | grid_dims = [int(y) for y in grid_size.split('x')]
27 | grid_height, grid_width = grid_dims[0], grid_dims[1]
28 | N_movie_panels = grid_height*grid_width
29 | print('need {} movie files for a grid of size {}'.format(N_movie_panels, grid_size))
30 | except:
31 | print('something wrong with grid_size argument, should be of form 5x8 (or similar)')
32 | exit()
33 |
34 | # take only the first N video files, no choosing process. It will use ones created
35 | # from running this program previously if they're there, so be careful.
36 | files_used = file_list[:N_movie_panels]
37 |
38 | clip_list = []
39 | clip_matrix = []
40 |
41 | # create a list of the video file clip objects, with a small margin around each
42 | for f in files_used:
43 | clip1 = VideoFileClip(f).margin(10)
44 | #clip1 = clip1.resize(0.50)
45 | clip_list.append(clip1)
46 |
47 | # put them into a list of lists, ie, a matrix, in the shape you want them to finally be
48 | for y in range(grid_height):
49 | temp_list = []
50 | for x in range(grid_width):
51 | temp_list.append(clip_list[y*grid_width + x])
52 |
53 | clip_matrix.append(temp_list)
54 |
55 | print('size of clip_matrix:', len(clip_matrix), len(clip_matrix[0]))
56 | final_clip = clips_array(clip_matrix) # put the clips side by side
57 |
58 | # fname stuff
59 | dt_string = fst.getDateString()
60 | base_fname = 'COMBINED_{}_{}'.format(grid_size, dt_string)
61 | movie_output_fname = fst.combineDirAndFile(path, '{}.{}'.format(base_fname, file_type))
62 |
63 | final_clip.write_videofile(movie_output_fname) # create the combined video file!
64 |
65 | if make_gif:
66 |
67 | px_size = 1260
68 | fps = 30
69 |
70 | gif_output_fname = fst.combineDirAndFile(path, '{}.gif'.format(base_fname))
71 |
72 | palette_fname = 'palette.png'
73 |
74 | create_palette_cmd = 'ffmpeg -y -i {} -vf fps={},scale={}:-1:flags=lanczos,palettegen {}'.format(movie_output_fname, fps, px_size, palette_fname)
75 | create_gif_cmd = 'ffmpeg -i {} -i {} -filter_complex "fps={},scale={}:-1:flags=lanczos[x];[x][1:v]paletteuse" {}'.format(movie_output_fname, palette_fname, fps, px_size, gif_output_fname)
76 |
77 | os.system(create_palette_cmd)
78 | os.system(create_gif_cmd)
79 |
80 | remove_palette_cmd = f'rm {palette_fname}'
81 | remove_movie_cmd = f'rm {movie_output_fname}'
82 |
83 | os.system(remove_palette_cmd)
84 | os.system(remove_movie_cmd)
85 |
86 |
87 | if __name__ == '__main__':
88 |
89 | # arguments to be read in via CLI
90 | parser = argparse.ArgumentParser()
91 | parser.add_argument('path')
92 | parser.add_argument('--grid_size', default='0')
93 | parser.add_argument('--file_type', default='mp4')
94 | parser.add_argument('--gif', action='store_true', default=False)
95 | args = parser.parse_args()
96 |
97 | kwargs = {}
98 | kwargs['path'] = args.path
99 | kwargs['file_type'] = args.file_type
100 | kwargs['grid_size'] = args.grid_size
101 | kwargs['make_gif'] = args.gif
102 |
103 | combineMovieFiles(**kwargs)
104 |
105 |
106 |
107 |
108 |
109 | #
110 |
--------------------------------------------------------------------------------
/classes/GymAgent.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./classes')
3 | import numpy as np
4 | import gym
5 | from gym import wrappers
6 | import FileSystemTools as fst
7 | import json
8 |
9 |
10 | '''
11 |
12 | This is a generalized agent for OpenAI gym environments.
13 | I'm doing it because I had to create diff. agents for each environment,
14 | when they really just need a few specific things for each. So now you just pass it
15 | env_name in the kwargs, and it will look up in a json file the right stuff.
16 |
17 | Here, the env_name you pass it will be something like 'Pendulum', not Pendulum-v0,
18 | because I don't want to have to deal with remembering versions.
19 |
20 | See createEnvJson.py and loadEnvJson() for details.
21 |
22 | need to provide:
23 |
24 | --state labels (for each state var)
25 | --action labels (for each action var)
26 | --gym_env_name
27 | --action_space_type
28 |
29 | functions:
30 |
31 | --getStateVec()
32 | --initEpisode()
33 | --iterate() (returns a tuple of (reward, state, boolean isDone))
34 | --setMaxEpisodeSteps()
35 |
36 | '''
37 |
38 |
39 |
40 | class GymAgent:
41 |
42 |
43 | def __init__(self, **kwargs):
44 |
45 | self.env_name = kwargs.get('env_name', None)
46 | assert self.env_name is not None, 'Need to provide an env_name argument!'
47 |
48 | # Load all the properties for this env.
49 | self.loadEnvJson(self.env_name)
50 | # Create the env
51 | self.env = gym.make(self.gym_env_name)
52 | self.setMaxEpisodeSteps(self.max_episode_steps)
53 | gym.logger.set_level(40)
54 |
55 | self.state = self.env.reset() # Should I be doing this here? sometimes trouble with resetting when done=False
56 | dt = fst.getDateString()
57 | self.base_name = f'{self.env_name}_{dt}'
58 | self.run_dir = kwargs.get('run_dir', '/home/declan/Documents/code/evo1/misc_runs/')
59 | self.monitor_is_on = False
60 |
61 |
62 | def setMaxEpisodeSteps(self, N_steps):
63 |
64 | self.env._max_episode_steps = N_steps
65 | self.env.spec.max_episode_steps = N_steps
66 | self.env.spec.timestep_limit = N_steps
67 |
68 |
69 | def closeEnv(self):
70 | # This doesn't seem to be a good idea to use with monitor?
71 | self.env.close()
72 | #self.env.render(close=True)
73 |
74 |
75 | def setMonitorOn(self, show_run=True):
76 | # It seems like when I call this, it gives a warning about the env not being
77 | # made with gym.make (which it is...), but if I call it only once for the same
78 | # agent, it doesn't run it every time I call it?
79 | #if not self.monitor_is_on:
80 | #
81 | # Also, it seems like you can't record the episode without showing it on the screen.
82 | # See https://github.com/openai/gym/issues/347 maybe?
83 |
84 | self.record_dir = fst.combineDirAndFile(self.run_dir, self.base_name)
85 | #if show_run:
86 | if True:
87 | self.env = wrappers.Monitor(self.env, self.record_dir)
88 | else:
89 | self.env = wrappers.Monitor(self.env, self.record_dir, video_callable=False, force=True)
90 | self.monitor_is_on = True
91 |
92 |
93 | def getStateVec(self):
94 | return(self.state[:self.N_state_terms])
95 |
96 |
97 | def initEpisode(self):
98 | self.state = self.env.reset()
99 |
100 |
101 | def iterate(self, action):
102 | # Action 0 is go L, action 1 is go R.
103 | observation, reward, done, info = self.env.step(action)
104 | self.state = observation
105 |
106 | return(reward, self.state, done)
107 |
108 |
109 |
110 | def drawState(self):
111 | self.env.render()
112 |
113 |
114 | def loadEnvJson(self, env_name):
115 |
116 | with open('gym_env_info.json') as json_file:
117 | env_info_dict = json.load(json_file)
118 |
119 | env_info = env_info_dict[env_name]
120 |
121 | self.gym_env_name = env_info['gym_env_name']
122 | self.state_labels = env_info['state_labels']
123 | self.action_labels = env_info['action_labels']
124 | self.action_space_type = env_info['action_space_type']
125 | self.max_episode_steps = env_info['max_episode_steps']
126 | self.N_state_terms = len(self.state_labels)
127 | self.N_actions = len(self.action_labels)
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 | #
136 |
--------------------------------------------------------------------------------
/classes/Node.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./classes')
3 | import numpy as np
4 | from math import exp, tanh
5 | from copy import copy
6 |
7 |
8 | class Node:
9 |
10 | def __init__(self, node_index):
11 |
12 | self.is_input_node = False
13 | self.is_output_node = False
14 | self.is_bias_node = False
15 | self.is_memory_node = False
16 |
17 | self.node_index = node_index
18 |
19 | self.input_indices = []
20 |
21 | self.inputs_received = []
22 |
23 | self.output_weights = {}
24 |
25 | self.value = None
26 |
27 |
28 | def setToInputNode(self):
29 | self.is_input_node = True
30 |
31 |
32 | def setToOutputNode(self):
33 | self.is_output_node = True
34 |
35 |
36 | def setToBiasNode(self):
37 | self.is_bias_node = True
38 | self.value = 1.0
39 |
40 |
41 | def setToMemoryNode(self):
42 | self.is_memory_node = True
43 | self.value = 0.0
44 |
45 |
46 | def getValue(self):
47 |
48 | if self.value is not None:
49 | return(self.value)
50 | else:
51 | if self.is_output_node:
52 | tot = sum(self.inputs_received)
53 | self.value = tot
54 | return(self.value)
55 | elif self.is_bias_node:
56 | pass
57 | elif self.is_memory_node:
58 | pass
59 | elif self.is_input_node:
60 | # For now, I'm just gonna set the input nodes directly via the .output value.
61 | return(self.value)
62 | else:
63 | tot = sum(self.inputs_received)
64 | self.value = self.nonlinear(tot)
65 | return(self.value)
66 |
67 |
68 |
69 | def calculateNodeValue(self):
70 | if self.is_output_node:
71 | tot = sum(self.inputs_received.values())
72 | self.value = tot
73 | elif self.is_bias_node:
74 | pass
75 | elif self.is_memory_node:
76 | pass
77 | elif self.is_input_node:
78 | # For now, I'm just gonna set the input nodes directly via the .output value.
79 | pass
80 | else:
81 | tot = sum(self.inputs_received.values())
82 | self.value = self.nonlinear(tot)
83 |
84 |
85 | def clearInputs(self):
86 | if not self.is_input_node:
87 | self.inputs_received = []
88 |
89 |
90 | def clearNode(self):
91 | self.clearInputs()
92 | self.value = None
93 |
94 |
95 | def setRandomOutputWeights(self):
96 | weights = np.random.normal(size=self.getNOutputs(), scale=0.1)
97 | self.output_weights = dict(zip(self.getOutputIndices(), weights))
98 |
99 |
100 | def removeFromInputIndices(self, ind):
101 | self.input_indices.remove(ind)
102 |
103 | def removeFromOutputWeights(self, ind):
104 | del self.output_weights[ind]
105 |
106 | def addToInputIndices(self, ind):
107 | self.input_indices.append(ind)
108 |
109 |
110 | def changeOutputWeightInd(self, old_ind, new_ind):
111 | weight = self.output_weights.pop(old_ind)
112 | self.output_weights[new_ind] = weight
113 |
114 | def addToOutputWeights(self, new_output_ind, val=None, std=0.1):
115 | if val is not None:
116 | self.output_weights[new_output_ind] = val
117 | else:
118 | self.output_weights[new_output_ind] = np.random.normal(scale=std)
119 |
120 |
121 | def mutateOutputWeight(self, ind, std=0.1):
122 | self.output_weights[ind] += np.random.normal(scale=std)
123 |
124 |
125 | def getOutputIndices(self):
126 | return(list(self.output_weights.keys()))
127 |
128 |
129 | def getNInputs(self):
130 | return(len(self.input_indices))
131 |
132 | def getNOutputs(self):
133 | return(len(self.output_weights))
134 |
135 |
136 | def getOutputWeightStr(self):
137 | w_str = ', '.join(['{}: {:.3f}'.format(k,v) for k,v in self.output_weights.items()])
138 | s = '[{}]'.format(w_str)
139 | return(s)
140 |
141 | def setOutputIndices(self, ind_list):
142 | self.output_weights = dict(zip(copy(ind_list), [0]*len(ind_list)))
143 |
144 |
145 | def setInputIndices(self, ind_list):
146 | self.input_indices = copy(ind_list)
147 | self.clearInputs()
148 |
149 |
150 | def allInputsReceived(self):
151 |
152 | #if self.input_indices is None:
153 | if len(self.input_indices) == 0:
154 | return(True)
155 |
156 | # checks if there are any None's left in the list. If there aren't, it has all inputs
157 | # and is ready to proceed.
158 | if list(self.inputs_received.values()).count(None)==0:
159 | return(True)
160 | else:
161 | return(False)
162 |
163 |
164 |
165 |
166 | def addToInputsReceived(self, val):
167 | self.inputs_received.append(val)
168 |
169 |
170 | def nonlinear(self, x):
171 |
172 | # Let's start with a nice simple sigmoid.
173 |
174 | #sigmoid = 1/(1 + exp(-x))
175 | #relu = max(0, x)
176 | tanh_x = tanh(x)
177 |
178 | return(tanh_x)
179 |
180 |
181 | #
182 |
--------------------------------------------------------------------------------
/scrap_old_test/PuckworldAgent.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | from math import sqrt
4 |
5 |
6 |
7 | class PuckworldAgent:
8 |
9 |
10 | def __init__(self, **kwargs):
11 |
12 | self.xlims = kwargs.get('xlims', np.array([-0.5,0.5]))
13 | self.ylims = kwargs.get('ylims', np.array([-0.5,0.5]))
14 | self.lims = np.array((self.xlims,self.ylims))
15 | self.max_dist = sqrt(np.ptp(self.xlims)**2 + np.ptp(self.ylims)**2)
16 | self.a = kwargs.get('a',1.0)
17 | self.drag = kwargs.get('drag', 0.5)
18 | self.time_step = kwargs.get('dt',10**-1)
19 | self.reward_type = kwargs.get('reward','sparse')
20 |
21 | self.passed_params = {}
22 | check_params = ['a', 'drag', 'dt', 'reward']
23 | for param in check_params:
24 | if kwargs.get(param, None) is not None:
25 | self.passed_params[param] = kwargs.get(param, None)
26 |
27 | self.N_actions = 4
28 |
29 |
30 | self.circ_rad = np.ptp(self.xlims)/20.0
31 | self.target_rad = 1*self.circ_rad
32 | self.resetTarget()
33 |
34 | self.pos0 = np.array([self.xlims.mean()/2.0,self.ylims.mean()/2.0])
35 | self.v0 = np.array([0.0,0.0])
36 | self.resetStateValues()
37 | self.accel_array = np.array([[0,1],[0,-1],[-1,0],[1,0]])
38 |
39 |
40 |
41 | self.N_state_terms = len(self.getStateVec())
42 |
43 |
44 |
45 | def puckTargetDist(self):
46 | return(sqrt(np.sum((self.pos-self.target)**2)))
47 |
48 |
49 | def addToHist(self):
50 | self.pos_hist = np.concatenate((self.pos_hist,[self.pos]))
51 | self.v_hist = np.concatenate((self.v_hist,[self.v]))
52 | self.t.append(self.t[-1] + self.time_step)
53 | self.r_hist.append(self.reward())
54 |
55 |
56 | def resetTarget(self):
57 |
58 | self.target = self.target_rad + self.lims[:,0] + np.random.random((2,))*(np.ptp(self.lims,axis=1)-2*self.target_rad)
59 |
60 |
61 | def iterateEuler(self,action):
62 |
63 | #this uses the Euler-Cromer method to move.
64 |
65 | #Right now I'm just gonna make it sit against a wall if it goes to the
66 | #boundary, but it might be cool to make periodic bry conds, to see if it would
67 | #learn to zoom around it.
68 |
69 | a = self.actionToAccel(action) - self.drag*self.v
70 |
71 | v_next = self.v + a*self.time_step
72 | pos_next = self.pos + v_next*self.time_step
73 |
74 | #To handle the walls
75 | for i in [0,1]:
76 | if pos_next[i] < (self.lims[i,0] + self.circ_rad):
77 | pos_next[i] = self.lims[i,0] + self.circ_rad
78 | # This makes it "bounce" off the wall, so it keeps momentum.
79 | #v_next[i] = -v_next[i]
80 | # This makes it "stick" to the wall.
81 | v_next[i] = 0
82 |
83 | if pos_next[i] > (self.lims[i,1] - self.circ_rad):
84 | pos_next[i] = self.lims[i,1] - self.circ_rad
85 | #v_next[i] = -v_next[i]
86 | v_next[i] = 0
87 |
88 | self.pos = pos_next
89 | self.v = v_next
90 | self.addToHist()
91 |
92 |
93 | def actionToAccel(self,action):
94 | self.a_hist.append(action)
95 | return(self.a*self.accel_array[action])
96 |
97 |
98 |
99 | ###################### Required agent functions
100 |
101 |
102 | def getPassedParams(self):
103 | #This returns a dict of params that were passed to the agent, that apply to the agent.
104 | #So if you pass it a param for 'reward', it will return that, but it won't return the
105 | #default val if you didn't pass it.
106 | return(self.passed_params)
107 |
108 |
109 | def getStateVec(self):
110 | assert self.target is not None, 'Need target to get state vec'
111 | return(np.concatenate((self.pos,self.v,self.target)))
112 |
113 |
114 | def reward(self):
115 |
116 | assert self.target is not None, 'Need a target'
117 |
118 | max_R = 1
119 |
120 | if self.reward_type == 'sparse':
121 | if self.puckTargetDist() <= (self.target_rad + self.circ_rad):
122 | return(max_R)
123 | else:
124 | return(-0.01)
125 |
126 | if self.reward_type == 'shaped':
127 | #return(max_R*(self.max_dist/2.0 - self.puckTargetDist()))
128 | #These numbers will probably have to change if a, dt, or the dimensions change.
129 | return(-0.5*self.puckTargetDist() + 0.4)
130 |
131 |
132 | def initEpisode(self):
133 | self.resetStateValues()
134 | self.resetTarget()
135 |
136 |
137 | def iterate(self,action):
138 | self.iterateEuler(action)
139 |
140 | r = self.reward()
141 | if r > 0:
142 | self.resetTarget()
143 |
144 | return(r, self.getStateVec(), False)
145 |
146 |
147 | def resetStateValues(self):
148 |
149 | self.pos = self.pos0
150 | self.v = self.v0
151 |
152 | self.pos_hist = np.array([self.pos])
153 | self.v_hist = np.array([self.v])
154 | self.t = [0]
155 | self.a_hist = [0]
156 | self.r_hist = []
157 |
158 |
159 | def drawState(self,ax):
160 |
161 | ax.clear()
162 | ax.set_xlim(tuple(self.xlims))
163 | ax.set_ylim(tuple(self.ylims))
164 |
165 | ax.set_xlabel('x')
166 | ax.set_ylabel('y')
167 | ax.set_aspect('equal')
168 |
169 | puck = plt.Circle(tuple(self.pos), self.circ_rad, color='tomato')
170 | ax.add_artist(puck)
171 |
172 | if self.target is not None:
173 | target = plt.Circle(tuple(self.target), self.target_rad, color='seagreen')
174 | ax.add_artist(target)
175 |
176 |
177 | def plotStateParams(self,axes):
178 |
179 | ax1 = axes[0]
180 | ax2 = axes[1]
181 | ax3 = axes[2]
182 | ax4 = axes[3]
183 |
184 | ax1.clear()
185 | ax1.plot(self.pos_hist[:,0][-1000:],label='x')
186 | ax1.plot(self.pos_hist[:,1][-1000:],label='y')
187 | ax1.legend()
188 |
189 | ax2.clear()
190 | ax2.plot(self.a_hist[-1000:],label='a')
191 | ax2.set_yticks([0,1,2,3])
192 | ax2.set_yticklabels(['U','D','L','R'])
193 | ax2.legend()
194 |
195 |
196 | ax3.clear()
197 | ax3.plot(self.r_hist[-1000:],label='R')
198 | ax3.legend()
199 |
200 |
201 | ax4.clear()
202 | ax4.plot(self.v_hist[:,0][-1000:],label='vx')
203 | ax4.plot(self.v_hist[:,1][-1000:],label='vy')
204 | ax4.legend()
205 |
206 |
207 |
208 |
209 | #
210 |
--------------------------------------------------------------------------------
/classes/FileSystemTools.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from os import mkdir
3 | import os
4 | from copy import copy,deepcopy
5 | import time
6 | import glob
7 | import subprocess
8 |
9 | def getDateString():
10 | return(datetime.now().strftime('%d-%m-%Y_%H-%M-%S'))
11 |
12 |
13 | def makeDir(dir_name):
14 | # Even if this is in a library dir, it should make the dir
15 | # in the script that called it.
16 | mkdir(dir_name)
17 | return(dir_name)
18 |
19 |
20 | def makeDateDir(base_dir='.'):
21 | # Just creates a dir with the current date for its name
22 | ds = getDateString()
23 | full_dir = combineDirAndFile(base_dir, ds)
24 | makeDir(full_dir)
25 | return(full_dir)
26 |
27 |
28 | def makeLabelDateDir(label, base_dir='.'):
29 | # You give it a label, and it creates the dir label_datestring
30 | dir_name = label + '_' + getDateString()
31 | full_dir = combineDirAndFile(base_dir, dir_name)
32 | makeDir(full_dir)
33 | return(full_dir)
34 |
35 |
36 | def combineDirAndFile(dir, file):
37 | # Adds the file to the end of dir, adding a slash in between if needed.
38 | return(addTrailingSlashIfNeeded(dir) + file)
39 |
40 |
41 | def dictPrettyPrint(in_dict):
42 |
43 | # Formats a dict into a nice string with each k,v entry on a new line,
44 | # and prints it.
45 | dict_str = '{\n'
46 |
47 | for k,v in in_dict.items():
48 | dict_str += '\t{} : {}\n'.format(k, v)
49 |
50 | dict_str += '\n}\n'
51 | print(dict_str)
52 |
53 |
54 |
55 |
56 | def dictToStringList(dict):
57 | pd_copy = copy(dict)
58 | for k,v in pd_copy.items():
59 | if type(v).__name__ == 'float':
60 | if abs(v)>10**-4:
61 | pd_copy[k] = '{:.5f}'.format(v)
62 | else:
63 | pd_copy[k] = '{:.2E}'.format(v)
64 |
65 | params = [str(k)+'='+str(v) for k,v in pd_copy.items() if v is not None]
66 | return(params)
67 |
68 |
69 |
70 | def paramDictToFnameStr(param_dict):
71 | # Creates a string that can be used as an fname, separated by
72 | # underscores. If a param has the value None, it isn't included.
73 | params = dictToStringList(param_dict)
74 | return('_'.join(params))
75 |
76 | def paramDictToLabelStr(param_dict):
77 | # Creates a string that can be used as an fname, separated by
78 | # ', '. If a param has the value None, it isn't included.
79 | params = dictToStringList(param_dict)
80 | return(', '.join(params))
81 |
82 |
83 | def listToFname(list):
84 | return('_'.join(list))
85 |
86 |
87 | def parseSingleAndListParams(param_dict, exclude_list):
88 |
89 | # This is useful for if you want to do multiple runs, varying one or
90 | # several parameters at once. exclude_list are ones you don't want to
91 | # include in the parameters in the tuple.
92 |
93 | # It returns a list of the parameters that are varied,
94 | # and a list of dictionaries that can be directly passed to a function, where
95 | # each one has a different set of the varied params.
96 | #
97 | # You should pass the args where if you don't want to vary an arg, it's just normal
98 | # my_arg = 5, but if you do want to vary it, you pass it a list of the vary values, like
99 | # my_arg = [1, 5, 8]. If you want to vary two at the same time, you pass them both as separate
100 | # lists, and it will match them up, but they need to be the same size.
101 |
102 | # list_params is just a list of the params that were passed as a list, that we'll vary.
103 | list_params = []
104 | # single_params is a dict of the params that aren't varied and will have the same vals in each
105 | # separate run.
106 | single_params = {}
107 | # ziplist is a list of the lists for the params that are varied. So if there are two varied
108 | # args, each length 3, it will take these, and then below zip them to create a list of pairs.
109 | # arg1=[1,2,3], arg2=[2,4,8] -> ziplist=[arg1,arg2] -> param_tups=[(1,2),(2,4),(3,8)]
110 | ziplist = []
111 |
112 |
113 | for k,v in param_dict.items():
114 | if type(v).__name__ == 'list':
115 | list_params.append(k)
116 | ziplist.append(v)
117 | else:
118 | if k not in exclude_list:
119 | single_params[k] = v
120 |
121 | param_tups = list(zip(*ziplist))
122 |
123 | vary_param_dicts = []
124 | vary_param_tups = []
125 | for tup in param_tups:
126 | temp_dict = dict(zip(list_params,tup))
127 | temp_kw = {**single_params, **temp_dict}
128 | vary_param_tups.append(temp_dict)
129 | vary_param_dicts.append(temp_kw)
130 |
131 | # list_params: just a list of the names of the varied ones.
132 | # vary_param_dicts: a list of the dicts that you can pass to each iteration, which includes the args that don't vary.
133 | # vary_param_tups: a list of dicts corresponding to vary_param_dicts, of only the values that change.
134 | return(list_params, vary_param_dicts, vary_param_tups)
135 |
136 |
137 |
138 | def strfdelta(tdelta, fmt):
139 | d = {"days": tdelta.days}
140 | d["hours"], rem = divmod(tdelta.seconds, 3600)
141 | d["minutes"], d["seconds"] = divmod(rem, 60)
142 | return fmt.format(**d)
143 |
144 |
145 | def getCurTimeObj():
146 | return(datetime.now())
147 |
148 |
149 | def getTimeDiffNum(start_time_obj):
150 |
151 | diff = datetime.timestamp(datetime.now()) - datetime.timestamp(start_time_obj)
152 | return(diff)
153 |
154 |
155 | def getTimeDiffObj(start_time_obj):
156 | #Gets the time diff in a nice format from the start_time_obj.
157 | diff = datetime.now() - start_time_obj
158 | return(diff)
159 |
160 |
161 | def getTimeDiffStr(start_time_obj):
162 | #Gets the time diff in a nice format from the start_time_obj.
163 | diff = getTimeDiffObj(start_time_obj)
164 |
165 | return(strfdelta(diff,'{hours} hrs, {minutes} mins, {seconds} s'))
166 |
167 |
168 | def writeDictToFile(dict, fname):
169 | # You have to copy it here, otherwise it'll actually overwrite the values in the dict
170 | # you passed.
171 | my_dict = copy(dict)
172 | f = open(fname,'w+')
173 | for k,v in my_dict.items():
174 | if type(v).__name__ == 'float':
175 | if abs(v)>10**-4:
176 | my_dict[k] = '{:.5f}'.format(v)
177 | else:
178 | my_dict[k] = '{:.2E}'.format(v)
179 | f.write('{} = {}\n'.format(k, my_dict[k]))
180 |
181 | f.close()
182 |
183 |
184 | def readFileToDict(fname):
185 | d = {}
186 | with open(fname) as f:
187 | for line in f:
188 | (key, val) = line.split(' = ')
189 | val = val.strip('\n')
190 | #This is to handle the fact that everything gets read in
191 | #as a string, but some stuff you probably want to be floats.
192 | try:
193 | val = float(val)
194 | except:
195 | val = str(val)
196 |
197 | d[key] = val
198 |
199 |
200 | return(d)
201 |
202 |
203 | def dirFromFullPath(fname):
204 | # This gives you the path, stripping the local filename, if you pass it
205 | # a long path + filename.
206 | parts = fname.split('/')
207 | last_part = parts[-1]
208 | path = fname.replace(last_part,'')
209 | if path == '':
210 | return('./')
211 | else:
212 | return(path)
213 |
214 |
215 | def fnameFromFullPath(fname):
216 | # This just gets the local filename if you passed it some huge long name with the path.
217 | parts = fname.split('/')
218 | last_part = parts[-1]
219 | return(last_part)
220 |
221 | def stripAnyTrailingSlash(path):
222 | if path[-1] == '/':
223 | return(path[:-1])
224 | else:
225 | return(path)
226 |
227 |
228 | def addTrailingSlashIfNeeded(path):
229 | if path[-1] == '/':
230 | return(path)
231 | else:
232 | return(path + '/')
233 |
234 |
235 |
236 |
237 |
238 | def gifFromImages(imgs_path, gif_name, ext = '.png', delay=50):
239 |
240 |
241 | imgs_path = stripAnyTrailingSlash(imgs_path)
242 | file_list = glob.glob(imgs_path + '/' + '*' + ext) # Get all the pngs in the current directory
243 | #print(file_list)
244 | #print([fnameFromFullPath(x).split('.png')[0] for x in file_list])
245 | #list.sort(file_list, key=lambda x: int(x.split('_')[1].split('.png')[0]))
246 | list.sort(file_list, key=lambda x: int(fnameFromFullPath(x).split(ext)[0]))
247 | #list.sort(file_list) # Sort the images by #, this may need to be tweaked for your use case
248 | #print(file_list)
249 | assert len(file_list) < 300, 'Too many files ({}), will probably crash convert command.'.format(len(file_list))
250 |
251 | output_fname = '{}/{}.gif'.format(imgs_path, gif_name)
252 |
253 | check_call_arglist = ['convert'] + ['-delay', str(delay)] + file_list + [output_fname]
254 | #print(check_call_arglist)
255 | print('Calling convert command to create gif...')
256 | subprocess.check_call(check_call_arglist)
257 | print('done.')
258 | return(output_fname)
259 | # older method:
260 |
261 | '''with open('image_list.txt', 'w') as file:
262 | for item in file_list:
263 | file.write("%s\n" % item)
264 |
265 | os.system('convert @image_list.txt {}/{}.gif'.format(imgs_path,gif_name)) # On windows convert is 'magick'
266 | '''
267 |
268 | #
269 |
--------------------------------------------------------------------------------
/scrap_old_test/agent1.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | from math import sqrt
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.optim as optim
8 | from torch.distributions import Categorical
9 | from copy import deepcopy
10 |
11 |
12 | class DQN(nn.Module):
13 |
14 | def __init__(self, D_in, H, D_out, NL_fn=torch.tanh, softmax=False):
15 | super(DQN, self).__init__()
16 |
17 | self.lin1 = nn.Linear(D_in,H)
18 | self.lin2 = nn.Linear(H,D_out)
19 | self.NL_fn = NL_fn
20 | self.softmax = softmax
21 |
22 | def forward(self, x):
23 | x = self.lin1(x)
24 | #x = F.relu(x)
25 | #x = torch.tanh(x)
26 | x = self.NL_fn(x)
27 | x = self.lin2(x)
28 | if self.softmax:
29 | x = torch.softmax(x,dim=1)
30 | return(x)
31 |
32 |
33 |
34 | class agent1:
35 |
36 |
37 | def __init__(self, **kwargs):
38 |
39 | self.xlims = kwargs.get('xlims', np.array([-0.5,0.5]))
40 | self.ylims = kwargs.get('ylims', np.array([-0.5,0.5]))
41 | self.lims = np.array((self.xlims,self.ylims))
42 | self.max_dist = sqrt(np.ptp(self.xlims)**2 + np.ptp(self.ylims)**2)
43 | self.a = kwargs.get('a',1.0)
44 | self.drag = kwargs.get('drag', 0.5)
45 | self.time_step = kwargs.get('dt',10**-1)
46 | self.reward_type = kwargs.get('reward','sparse')
47 |
48 | self.passed_params = {}
49 | check_params = ['a', 'drag', 'dt', 'reward']
50 | for param in check_params:
51 | if kwargs.get(param, None) is not None:
52 | self.passed_params[param] = kwargs.get(param, None)
53 |
54 | self.N_actions = 4
55 |
56 |
57 | self.circ_rad = np.ptp(self.xlims)/20.0
58 | self.target_rad = 1*self.circ_rad
59 | self.resetTarget()
60 |
61 | self.pos0 = np.array([self.xlims.mean()/2.0,self.ylims.mean()/2.0])
62 | self.v0 = np.array([0.0,0.0])
63 | self.resetStateValues()
64 | self.accel_array = np.array([[0,1],[0,-1],[-1,0],[1,0]])
65 |
66 |
67 |
68 | self.N_state_terms = len(self.getStateVec())
69 |
70 | self.HLN = 20
71 |
72 | self.dtype = torch.float32
73 | torch.set_default_dtype(self.dtype)
74 |
75 | # I think it's already randomly initializing the weights with a gaussian mean=0, std=1
76 | self.policy_NN = DQN(self.N_state_terms, self.HLN, self.N_actions, softmax=True)
77 |
78 | '''for p in self.policy_NN.parameters():
79 | print(p.data)'''
80 | self.N_weight_tensors = len(list(self.policy_NN.parameters()))
81 |
82 |
83 | self.N_mate_swaps = 18
84 | self.N_mutations = 2
85 |
86 |
87 |
88 | def puckTargetDist(self):
89 | return(sqrt(np.sum((self.pos-self.target)**2)))
90 |
91 |
92 | def addToHist(self):
93 | self.pos_hist = np.concatenate((self.pos_hist,[self.pos]))
94 | self.v_hist = np.concatenate((self.v_hist,[self.v]))
95 | self.t.append(self.t[-1] + self.time_step)
96 | self.r_hist.append(self.reward())
97 |
98 |
99 | def resetTarget(self):
100 |
101 | self.target = self.target_rad + self.lims[:,0] + np.random.random((2,))*(np.ptp(self.lims,axis=1)-2*self.target_rad)
102 |
103 |
104 | def iterateEuler(self,action):
105 |
106 | #this uses the Euler-Cromer method to move.
107 |
108 | #Right now I'm just gonna make it sit against a wall if it goes to the
109 | #boundary, but it might be cool to make periodic bry conds, to see if it would
110 | #learn to zoom around it.
111 |
112 | a = self.actionToAccel(action) - self.drag*self.v
113 |
114 | v_next = self.v + a*self.time_step
115 | pos_next = self.pos + v_next*self.time_step
116 |
117 | #To handle the walls
118 | for i in [0,1]:
119 | if pos_next[i] < (self.lims[i,0] + self.circ_rad):
120 | pos_next[i] = self.lims[i,0] + self.circ_rad
121 | # This makes it "bounce" off the wall, so it keeps momentum.
122 | v_next[i] = -v_next[i]
123 | # This makes it "stick" to the wall.
124 | #v_next[i] = 0
125 |
126 | if pos_next[i] > (self.lims[i,1] - self.circ_rad):
127 | pos_next[i] = self.lims[i,1] - self.circ_rad
128 | v_next[i] = -v_next[i]
129 | #v_next[i] = 0
130 |
131 | self.pos = pos_next
132 | self.v = v_next
133 | self.addToHist()
134 |
135 |
136 | def actionToAccel(self,action):
137 | self.a_hist.append(action)
138 | return(self.a*self.accel_array[action])
139 |
140 |
141 | def softmaxAction(self, state_vec):
142 | pi_vals = self.policy_NN(state_vec)
143 | m = Categorical(pi_vals)
144 | return(m.sample())
145 |
146 | ###################### Required agent functions
147 |
148 |
149 | def mate(self, other_agent):
150 |
151 | ag1 = deepcopy(self)
152 | ag2 = deepcopy(other_agent)
153 |
154 | lin1_weight_shape = ag1.policy_NN.lin1.weight.data.shape
155 | lin1_bias_shape = ag1.policy_NN.lin1.bias.data.shape
156 | lin2_weight_shape = ag1.policy_NN.lin2.weight.data.shape
157 | lin2_bias_shape = ag1.policy_NN.lin2.bias.data.shape
158 |
159 |
160 | for i in range(self.N_mate_swaps):
161 |
162 | r1 = np.random.randint(0, lin1_weight_shape[0])
163 | r2 = np.random.randint(0, lin1_weight_shape[1])
164 | ag1.policy_NN.lin1.weight.data[r1,r2], ag2.policy_NN.lin1.weight.data[r1,r2] = ag2.policy_NN.lin1.weight.data[r1,r2], ag1.policy_NN.lin1.weight.data[r1,r2]
165 |
166 | r1 = np.random.randint(0, lin2_weight_shape[0])
167 | r2 = np.random.randint(0, lin2_weight_shape[1])
168 | ag1.policy_NN.lin2.weight.data[r1,r2], ag2.policy_NN.lin2.weight.data[r1,r2] = ag2.policy_NN.lin2.weight.data[r1,r2], ag1.policy_NN.lin2.weight.data[r1,r2]
169 |
170 | r1 = np.random.randint(0, lin1_weight_shape[0])
171 | ag1.policy_NN.lin1.bias.data[r1], ag2.policy_NN.lin1.bias.data[r1] = ag2.policy_NN.lin1.bias.data[r1], ag1.policy_NN.lin1.bias.data[r1]
172 |
173 | r1 = np.random.randint(0, lin2_weight_shape[0])
174 | ag1.policy_NN.lin2.bias.data[r1], ag2.policy_NN.lin2.bias.data[r1] = ag2.policy_NN.lin2.bias.data[r1], ag1.policy_NN.lin2.bias.data[r1]
175 |
176 | return(ag1, ag2)
177 |
178 |
179 | def isSameState(self, other_agent):
180 | return(False)
181 |
182 |
183 | def mutate(self):
184 |
185 | lin1_weight_shape = self.policy_NN.lin1.weight.data.shape
186 | lin1_bias_shape = self.policy_NN.lin1.bias.data.shape
187 | lin2_weight_shape = self.policy_NN.lin2.weight.data.shape
188 | lin2_bias_shape = self.policy_NN.lin2.bias.data.shape
189 |
190 | for i in range(self.N_mutations):
191 |
192 | r1 = np.random.randint(0, lin1_weight_shape[0])
193 | r2 = np.random.randint(0, lin1_weight_shape[1])
194 | self.policy_NN.lin1.weight.data[r1,r2] = np.random.randn()
195 |
196 | r1 = np.random.randint(0, lin2_weight_shape[0])
197 | r2 = np.random.randint(0, lin2_weight_shape[1])
198 | self.policy_NN.lin2.weight.data[r1,r2] = np.random.randn()
199 |
200 | r1 = np.random.randint(0, lin1_weight_shape[0])
201 | self.policy_NN.lin1.bias.data[r1] = np.random.randn()
202 |
203 | r1 = np.random.randint(0, lin2_weight_shape[0])
204 | self.policy_NN.lin2.bias.data[r1] = np.random.randn()
205 |
206 |
207 |
208 | def fitnessFunction(self):
209 | self.fixedLengthEpisode(100)
210 | # I think the fitness function is meant to be minimized, so we should pass it
211 | # the negative of the total reward.
212 | return(-sum(self.r_hist))
213 |
214 |
215 |
216 | def fixedLengthEpisode(self, N_steps):
217 | self.resetTarget()
218 | self.resetStateValues()
219 |
220 | for i in range(N_steps):
221 | s = torch.tensor(self.getStateVec(), dtype=torch.float32).unsqueeze(dim=0)
222 | a = self.softmaxAction(s)
223 | r, s_next = self.iterate(a)
224 |
225 |
226 | def getPassedParams(self):
227 | #This returns a dict of params that were passed to the agent, that apply to the agent.
228 | #So if you pass it a param for 'reward', it will return that, but it won't return the
229 | #default val if you didn't pass it.
230 | return(self.passed_params)
231 |
232 |
233 | def getStateVec(self):
234 | assert self.target is not None, 'Need target to get state vec'
235 | return(np.concatenate((self.pos,self.v,self.target)))
236 |
237 |
238 | def getState(self):
239 | return(self.getStateVec())
240 |
241 | def printState(self):
242 | print(self.getState())
243 |
244 | def reward(self):
245 |
246 | assert self.target is not None, 'Need a target'
247 |
248 | max_R = 1
249 |
250 | if self.reward_type == 'sparse':
251 | if self.puckTargetDist() <= (self.target_rad + self.circ_rad):
252 | return(max_R)
253 | else:
254 | return(-0.01)
255 |
256 | if self.reward_type == 'shaped':
257 | #return(max_R*(self.max_dist/2.0 - self.puckTargetDist()))
258 | #These numbers will probably have to change if a, dt, or the dimensions change.
259 | return(-0.5*self.puckTargetDist() + 0.4)
260 |
261 |
262 | def initEpisode(self):
263 | self.resetStateValues()
264 | self.resetTarget()
265 |
266 |
267 | def iterate(self,action):
268 | self.iterateEuler(action)
269 |
270 | r = self.reward()
271 | if r > 0:
272 | self.resetTarget()
273 |
274 | return(r,self.getStateVec())
275 |
276 |
277 | def resetStateValues(self):
278 |
279 | self.pos = self.pos0
280 | self.v = self.v0
281 |
282 | self.pos_hist = np.array([self.pos])
283 | self.v_hist = np.array([self.v])
284 | self.t = [0]
285 | self.a_hist = [0]
286 | self.r_hist = []
287 |
288 |
289 | def drawState(self, ax):
290 |
291 | ax.clear()
292 | ax.set_xlim(tuple(self.xlims))
293 | ax.set_ylim(tuple(self.ylims))
294 |
295 | ax.set_xlabel('x')
296 | ax.set_ylabel('y')
297 | ax.set_aspect('equal')
298 |
299 | puck = plt.Circle(tuple(self.pos), self.circ_rad, color='tomato')
300 | ax.add_artist(puck)
301 |
302 | if self.target is not None:
303 | target = plt.Circle(tuple(self.target), self.target_rad, color='seagreen')
304 | ax.add_artist(target)
305 |
306 |
307 | def plotStateParams(self,axes):
308 |
309 | ax1 = axes[0]
310 | ax2 = axes[1]
311 | ax3 = axes[2]
312 | ax4 = axes[3]
313 |
314 | ax1.clear()
315 | ax1.plot(self.pos_hist[:,0][-1000:],label='x')
316 | ax1.plot(self.pos_hist[:,1][-1000:],label='y')
317 | ax1.legend()
318 |
319 | ax2.clear()
320 | ax2.plot(self.a_hist[-1000:],label='a')
321 | ax2.set_yticks([0,1,2,3])
322 | ax2.set_yticklabels(['U','D','L','R'])
323 | ax2.legend()
324 |
325 |
326 | ax3.clear()
327 | ax3.plot(self.r_hist[-1000:],label='R')
328 | ax3.legend()
329 |
330 |
331 | ax4.clear()
332 | ax4.plot(self.v_hist[:,0][-1000:],label='vx')
333 | ax4.plot(self.v_hist[:,1][-1000:],label='vy')
334 | ax4.legend()
335 |
336 |
337 |
338 |
339 | #
340 |
--------------------------------------------------------------------------------
/classes/PopTests.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./classes')
3 | import Population
4 | import matplotlib.pyplot as plt
5 | from statistics import mean,stdev
6 | import FileSystemTools as fst
7 | from time import time
8 | import numpy as np
9 | import os
10 | import glob
11 | from math import sqrt, ceil
12 | import subprocess
13 |
14 |
15 |
16 | def varyParam(**kwargs):
17 |
18 | st = fst.getCurTimeObj()
19 |
20 | date_time = fst.getDateString()
21 | notes = kwargs.get('notes', '')
22 | N_runs = kwargs.get('N_runs', 1)
23 | show_plot = kwargs.get('show_plot', False)
24 |
25 | exclude_list = ['notes', 'N_runs', 'show_plot']
26 | vary_params, vary_param_dict_list, vary_param_tups = fst.parseSingleAndListParams(kwargs,exclude_list)
27 |
28 | label = 'vary_' + fst.listToFname(vary_params) + '_' + notes
29 | dir = fst.makeLabelDateDir(label)
30 | print('Saving vary param results to: ', dir)
31 | img_ext = '.png'
32 | base_fname = fst.combineDirAndFile(dir, label + date_time)
33 | img_fname = base_fname + img_ext
34 |
35 | log_fname = base_fname + '_log.txt'
36 | fst.writeDictToFile(kwargs, log_fname)
37 |
38 | # Set the SD for each entry to 0. If there's only 1 run each, that's fine. If
39 | # there are several, it will replace the 0's.
40 | R_tots = []
41 | SD = [0]*len(vary_param_dict_list)
42 |
43 | for i, kws in enumerate(vary_param_dict_list):
44 |
45 | print('\n{}\n'.format(vary_param_tups[i]))
46 | results = []
47 | for j in range(N_runs):
48 | print('run ',j)
49 |
50 | p1 = Population.Population(**kws, dir=dir, fname_notes=fst.paramDictToFnameStr(vary_param_tups[i]))
51 |
52 | _, _, r_tot = p1.evolve(**kws)
53 |
54 | results.append(r_tot)
55 |
56 | R_tots.append(mean(results))
57 | if N_runs>1:
58 | SD[i] = stdev(results)
59 |
60 |
61 | plt.close('all')
62 | fig,axes = plt.subplots(1,1,figsize=(6,9))
63 |
64 | plt.errorbar(list(range(len(R_tots))), R_tots, yerr=SD, fmt='ro-')
65 |
66 | axes.set_xticks(list(range(len(R_tots))))
67 | x_tick_labels = ['\n'.join(fst.dictToStringList(param)) for param in vary_param_tups]
68 | axes.set_xticklabels(x_tick_labels, rotation='vertical')
69 | axes.set_ylabel('Total reward')
70 | plt.tight_layout()
71 | plt.savefig(img_fname)
72 |
73 | vary_param_labels = [','.join(fst.dictToStringList(param)) for param in vary_param_tups]
74 | f = open(base_fname + '_values.txt','w+')
75 | for label, val, sd in zip(vary_param_labels, R_tots, SD):
76 | f.write('{}\t{}\t{}\n'.format(label, val, sd))
77 | f.close()
78 |
79 | print('\n\ntook {} to execute'.format(fst.getTimeDiffStr(st)))
80 |
81 | plotRewardCurvesByVaryParam(dir, searchlabel='bestscore')
82 | plotRewardCurvesByVaryParam(dir, searchlabel='meanscore')
83 |
84 | if show_plot:
85 | plt.show()
86 |
87 |
88 |
89 |
90 | def plotRewardCurvesByVaryParam(dir, searchlabel, **kwargs):
91 |
92 | #Use the "values" file from now on to get the vary_param values
93 | # searchlabel will be the thing the fname we're searching for is prefaced with,
94 | # so we can do the same for multiple things.
95 |
96 | # Find the values file
97 | val_file_list = glob.glob(fst.addTrailingSlashIfNeeded(dir) + 'vary_' + '*' + 'values.txt')
98 |
99 | assert len(val_file_list)==1, 'there needs to be exactly one values.txt file.'
100 |
101 | vals_file = val_file_list[0]
102 |
103 | # Read in each line, corresponding to each vary params tuple
104 | with open(vals_file, 'r') as f:
105 | vary_param_vals = f.read().split('\n')
106 |
107 | # they're tab sep'd, so split and grab the first of each col.
108 | vary_param_vals = [x.split('\t')[0] for x in vary_param_vals if x!='']
109 | # Expects the vary params to be separated by underscores -- not ideal.
110 | vary_param_vals = [x.replace(',','_') for x in vary_param_vals]
111 | # Get the files that contain this series of vary vals...
112 | vary_param_files = [glob.glob(fst.addTrailingSlashIfNeeded(dir) + searchlabel + '*' + val + '*' + '.txt') for val in vary_param_vals]
113 |
114 |
115 |
116 | fig, ax = plt.subplots(1, 1, figsize=(10,8))
117 |
118 | line_cols = ['darkred', 'mediumblue', 'darkgreen', 'goldenrod', 'purple', 'darkorange', 'black']
119 | shade_cols = ['tomato', 'dodgerblue', 'lightgreen', 'khaki', 'plum', 'peachpuff', 'lightgray']
120 | max_total = -1000
121 | min_total = 1000
122 | N_stds = 2
123 | N_skip = 0
124 |
125 | # This is a really hacky way of lining up curves that are shifted. You pass it a list of
126 | # how each curve (the avg) will be scaled and how each will be offset. If you don't pass it
127 | # anything, it won't do anything differently.
128 | scale_factors = kwargs.get('scale_factors', np.ones(len(vary_param_vals)))
129 | offsets = kwargs.get('offsets', np.zeros(len(vary_param_vals)))
130 |
131 | # For doing the scale and offset thing, and making sure the ranges are right.
132 | print('vary_param_vals', vary_param_vals)
133 | for i, (val, file_group) in enumerate(zip(vary_param_vals, vary_param_files)):
134 | dat_array = np.array([np.loadtxt(fname) for fname in file_group])
135 | avg = np.mean(dat_array, axis=0)*scale_factors[i] + offsets[i]
136 | std = np.std(dat_array, axis=0)*scale_factors[i]
137 | if max((avg + N_stds*std)[N_skip:]) > max_total:
138 | max_total = max((avg + N_stds*std)[N_skip:])
139 | if min((avg - N_stds*std)[N_skip:]) < min_total:
140 | min_total = min((avg - N_stds*std)[N_skip:])
141 | plt.plot(avg, color=line_cols[i], label=val)
142 | plt.fill_between(np.array(range(len(avg))), avg - std, avg + std, facecolor=shade_cols[i], alpha=0.5)
143 |
144 | #print(max_total, min_total)
145 | plt.legend()
146 | plt.xlabel('generations')
147 | plt.ylabel(searchlabel)
148 | plt.ylim((min_total,max_total))
149 |
150 | plt.savefig(fst.addTrailingSlashIfNeeded(dir) + 'all_' + searchlabel + '__'.join(vary_param_vals) + '__' + fst.getDateString() + '.png')
151 |
152 | # For each one separately
153 | print('vary_param_vals', vary_param_vals)
154 | for i, (val, file_group) in enumerate(zip(vary_param_vals, vary_param_files)):
155 | #print(max_total, min_total)
156 | plt.clf()
157 | dat_array = np.array([np.loadtxt(fname) for fname in file_group])
158 | avg = np.mean(dat_array, axis=0)*scale_factors[i] + offsets[i]
159 | std = np.std(dat_array, axis=0)*scale_factors[i]
160 |
161 | plt.plot(avg, color=line_cols[i], label=val)
162 | plt.legend()
163 | plt.xlabel('generations')
164 | plt.ylabel(searchlabel)
165 | plt.ylim((min_total,max_total))
166 | plt.fill_between(np.array(range(len(avg))), avg - std, avg + std, facecolor=shade_cols[i], alpha=0.5)
167 | plt.savefig(fst.addTrailingSlashIfNeeded(dir) + searchlabel + '__' + val + '__' + fst.getDateString() + '.png')
168 |
169 |
170 |
171 | def plotPopulationProperty(dir, search_label, **kwargs):
172 |
173 |
174 | show_plot = kwargs.get('show_plot', False)
175 | save_plot = kwargs.get('save_plot', True)
176 | make_hist_gif = kwargs.get('make_hist_gif', True)
177 |
178 | # Find the values file
179 | prop_file_list = glob.glob(fst.addTrailingSlashIfNeeded(dir) + search_label + '*' + '.txt')
180 |
181 | assert len(prop_file_list)==1, 'there needs to be exactly one .txt file.'
182 |
183 | prop_file = prop_file_list[0]
184 |
185 | prop_dat = np.loadtxt(prop_file)
186 |
187 | N_gen = prop_dat.shape[0]
188 | max_gif_frames = 299
189 | gif_save_period = ceil(N_gen/max_gif_frames)
190 |
191 | avg = np.mean(prop_dat)
192 | std = np.std(prop_dat)
193 |
194 | print('dat min: {:.2f}'.format(np.min(prop_dat)))
195 | print('dat max: {:.2f}'.format(np.max(prop_dat)))
196 | print('dat mean: {:.2f}'.format(avg))
197 | print('dat std: {:.2f}'.format(std))
198 |
199 | dat_lb = max(np.min(prop_dat), avg - 2*std)
200 | dat_ub = min(np.max(prop_dat), avg + 2*std)
201 |
202 | dat_lims = (dat_lb - 0.1*abs(dat_lb), dat_ub + 0.1*abs(dat_ub))
203 |
204 | # Make histogram gif
205 |
206 | if make_hist_gif:
207 |
208 | gif_dir = fst.makeDir(fst.combineDirAndFile(dir, 'gif_imgs'))
209 |
210 | for i, gen_dat in enumerate(prop_dat):
211 | plt.clf()
212 | plt.hist(gen_dat, facecolor='dodgerblue', edgecolor='k', label=search_label, alpha=0.9, density=True)
213 | plt.axvline(np.mean(gen_dat), color='k', linestyle='dashed', linewidth=1)
214 | plt.xlim(dat_lims)
215 | plt.ylim((0, 1.0/len(gen_dat)))
216 | plt.title(f'generation {i}')
217 | plt.xlabel(search_label)
218 | plt.ylabel('counts')
219 |
220 | if save_plot:
221 | if i%gif_save_period == 0:
222 | fname = fst.combineDirAndFile(gif_dir, f'{i}.png')
223 | plt.savefig(fname)
224 |
225 |
226 | try:
227 | gif_name = fst.gifFromImages(gif_dir, f'{search_label}_hist', ext='.png', delay=5)
228 | print(gif_name)
229 | gif_basename = fst.fnameFromFullPath(gif_name)
230 | subprocess.check_call(['mv', gif_name, fst.combineDirAndFile(dir, gif_basename)])
231 | subprocess.check_call(['rm', '-rf', gif_dir])
232 | except:
233 | print('problem in creating gif')
234 |
235 |
236 | plt.clf()
237 |
238 | # Make time scatter plot
239 |
240 | # Right now this is in the structure where a row is a generation and each
241 | # entry of that row is an individ. We want to plot it per gen, so we need
242 | # to make it a set of (generation, value) points (so 5 gens of 8 individs would
243 | # go 5x8 -> 5x8x2 -> 40x2 -> 2x40)
244 | prop_pts = np.array([[[i, val] for val in prop_dat[i]] for i in range(len(prop_dat))])
245 | N_tot_entries = prop_dat.shape[0]*prop_dat.shape[1] # Makes it 5x8x2
246 | prop_pts = np.reshape(prop_pts, (N_tot_entries, 2)) # Makes it 40x2
247 | prop_pts = np.swapaxes(prop_pts, 0, 1) # Makes it 2x40
248 |
249 | plt.plot(prop_pts[0], prop_pts[1], 'o', color='dodgerblue')
250 | plt.xlabel('generations')
251 | plt.ylabel(search_label)
252 | plt.ylim(dat_lims)
253 |
254 | if save_plot:
255 | plt.savefig(fst.combineDirAndFile(dir, search_label + '_scatter_plot.png'))
256 |
257 | if show_plot:
258 | plt.show()
259 |
260 | plt.clf()
261 |
262 | # Make time std plot
263 |
264 | gen_mean = np.mean(prop_dat, axis=1)
265 | gen_std = np.std(prop_dat, axis=1)
266 |
267 | line_cols = ['darkred', 'mediumblue', 'darkgreen', 'goldenrod', 'purple', 'darkorange', 'black']
268 | shade_cols = ['tomato', 'dodgerblue', 'lightgreen', 'khaki', 'plum', 'peachpuff', 'lightgray']
269 |
270 | plt.fill_between(np.array(range(len(gen_mean))), gen_mean - gen_std, gen_mean + gen_std, facecolor=shade_cols[0], alpha=0.5)
271 | plt.plot(gen_mean, color=line_cols[0])
272 |
273 | plt.xlabel('generations')
274 | plt.ylabel(search_label)
275 | plt.ylim(dat_lims)
276 |
277 | if save_plot:
278 | plt.savefig(fst.combineDirAndFile(dir, search_label + '_mean-std_plot.png'))
279 |
280 | if show_plot:
281 | plt.show()
282 |
283 | plt.close()
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 | #
302 |
--------------------------------------------------------------------------------
/classes/Population.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./classes')
3 | from EPANN import EPANN
4 | from copy import deepcopy
5 | import matplotlib.pyplot as plt
6 | import FileSystemTools as fst
7 | import RunTools as rt
8 | import numpy as np
9 | import subprocess
10 | from math import sqrt, floor
11 | import movie_combine
12 | import traceback as tb
13 |
14 | class Population:
15 |
16 |
17 | def __init__(self, **kwargs):
18 |
19 | self.agent_class = kwargs.get('agent_class', None)
20 | assert self.agent_class is not None, 'Need to provide an agent class! exiting'
21 |
22 | self.init_kwargs = kwargs
23 |
24 | self.N_pop = kwargs.get('N_pop', 15)
25 | self.mut_type = kwargs.get('mut_type', 'change_topo')
26 | self.gauss_std = kwargs.get('std', 0.2)
27 | self.best_N_frac = kwargs.get('best_N_frac', 1/5.0)
28 |
29 | self.fname_notes = '{}_{}{}'.format(kwargs.get('fname_notes', ''), self.agent_class.__name__, kwargs.get('env_name', ''))
30 | self.datetime_str = fst.getDateString()
31 | self.base_dir = kwargs.get('base_dir', 'misc_runs')
32 | self.dir = fst.combineDirAndFile(self.base_dir, 'evolve_{}_{}'.format(self.datetime_str, self.fname_notes))
33 | fst.makeDir(self.dir)
34 | self.plot_dir = fst.makeDir(fst.combineDirAndFile(self.dir, 'plots'))
35 | print('run dir: ', self.dir)
36 | pop_kwargs = {'run_dir' : self.dir}
37 | both_kwargs = {**kwargs, **pop_kwargs}
38 |
39 | self.population = [EPANN(**both_kwargs) for i in range(self.N_pop)]
40 |
41 |
42 |
43 | def evolve(self, **kwargs):
44 |
45 |
46 | start_time = fst.getCurTimeObj()
47 |
48 | N_trials_per_agent = kwargs.get('N_trials_per_agent', 3)
49 | N_episode_steps = kwargs.get('N_episode_steps', 400)
50 | N_gen = kwargs.get('N_gen', 50)
51 | N_trials_per_agent = kwargs.get('N_trials_per_agent', 3)
52 | N_runs_each_champion = kwargs.get('N_runs_each_champion', 5)
53 | N_runs_with_best = kwargs.get('N_runs_with_best', 10)
54 | assert N_runs_with_best > 0, 'Need at least one run with best individ!'
55 | record_final_runs = kwargs.get('record_final_runs', False)
56 | show_final_runs = kwargs.get('show_final_runs', False)
57 |
58 | # Create a log file for the kwargs
59 | log_fname = fst.combineDirAndFile(self.dir, f'log_{self.datetime_str}.txt')
60 | fst.writeDictToFile({**self.init_kwargs, **kwargs}, log_fname)
61 |
62 |
63 | best_FFs = []
64 | mean_FFs = []
65 |
66 | all_FFs = []
67 | all_nodecounts = []
68 | all_weightcounts = []
69 | champion_FF_mean_std = [] # A list of pairs of [mean, std] for runs of the current champion.
70 |
71 |
72 | #try:
73 | for i in range(N_gen):
74 |
75 | best_FF = -100000000
76 | mean_FF = 0
77 |
78 | mean_Rs = []
79 | for j, individ in enumerate(self.population):
80 | mean_episode_score = 0
81 | for run in range(N_trials_per_agent):
82 | mean_episode_score += individ.runEpisode(N_episode_steps)
83 |
84 | mean_episode_score = mean_episode_score/N_trials_per_agent
85 | mean_Rs.append([j, mean_episode_score])
86 | mean_FF += mean_episode_score
87 | if mean_episode_score > best_FF:
88 | best_FF = mean_episode_score
89 |
90 | mean_FF = mean_FF/self.N_pop
91 | best_FFs.append(best_FF)
92 | mean_FFs.append(mean_FF)
93 |
94 | mean_Rs_no_label = [x[1] for x in mean_Rs]
95 |
96 | # Run the champion for several times to get stats
97 | champion_ind = self.sortByFitnessFunction(mean_Rs)[0][0]
98 | champion_scores = []
99 | for run in range(N_runs_each_champion):
100 | champion_scores.append(self.population[champion_ind].runEpisode(N_episode_steps))
101 | champion_FF_mean_std.append([np.mean(champion_scores), np.std(champion_scores)])
102 |
103 |
104 | # Update with progress
105 | if i%max(1, int(N_gen/20))==0:
106 | print('\ngen {:.1f}. Best FF = {:.4f}, mean FF = {:.4f}'.format(i, best_FF, mean_FF))
107 | self.plotPopHist(mean_Rs_no_label, 'pop_FF')
108 | if self.mut_type == 'change_topo':
109 | self.plotPopHist([len(epann.node_list) for epann in self.population], 'pop_nodecount')
110 | self.plotPopHist([len(epann.weights_list) for epann in self.population], 'pop_weightcount')
111 | fname = fst.combineDirAndFile(self.plot_dir, '{}_gen{}_{}.png'.format('bestNN', i, fst.getDateString()))
112 | self.population[champion_ind].plotNetwork(show_plot=False, save_plot=True, fname=fname, node_legend=True)
113 |
114 | #print('network sizes: ', [len(x.node_list) for x in self.population])
115 | print('avg network size: {:.3f}'.format(sum([len(x.node_list) for x in self.population])/self.N_pop))
116 | #print('# network connections: ', [len(x.weights_list) for x in self.population])
117 | print('avg # network connections: {:.3f}'.format(sum([len(x.weights_list) for x in self.population])/self.N_pop))
118 |
119 | all_FFs.append(mean_Rs_no_label)
120 | all_nodecounts.append([len(epann.node_list) for epann in self.population])
121 | all_weightcounts.append([len(epann.weights_list) for epann in self.population])
122 |
123 | # Get the next gen by mutating
124 | self.getNextGen(mean_Rs)
125 |
126 |
127 | print('\n\nRun took: ', fst.getTimeDiffStr(start_time), '\n\n')
128 |
129 | self.saveScore(best_FFs, 'bestscore')
130 | self.saveScore(mean_FFs, 'meanscore')
131 | self.saveScore(all_FFs, 'all_FFs')
132 | self.saveScore(all_nodecounts, 'nodecounts')
133 | self.saveScore(all_weightcounts, 'weightcounts')
134 | self.saveScore(champion_FF_mean_std, 'champion_FF_mean_std')
135 |
136 | # Plot best and mean FF curves for the population
137 | plt.subplots(1, 1, figsize=(8,8))
138 | plt.plot(mean_FFs, color='dodgerblue', label='Pop. avg FF')
139 | plt.plot(best_FFs, color='tomato', label='Pop. best FF')
140 | plt.xlabel('generations')
141 | plt.ylabel('FF')
142 | plt.legend()
143 | fname = fst.combineDirAndFile(self.dir, '{}_{}.png'.format('FFplot', self.datetime_str))
144 | plt.savefig(fname)
145 |
146 | plt.close()
147 |
148 | # Plot the mean and std for the champion of each generation
149 | champion_FF_mean_std = np.array(champion_FF_mean_std)
150 | champ_mean = champion_FF_mean_std[:,0]
151 | champ_std = champion_FF_mean_std[:,1]
152 | plt.fill_between(
153 | np.array(range(len(champ_mean))),
154 | champ_mean - champ_std,
155 | champ_mean + champ_std,
156 | facecolor='dodgerblue', alpha=0.5)
157 |
158 | plt.plot(champ_mean, color='mediumblue')
159 | plt.xlabel('generations')
160 | plt.ylabel('FF')
161 | fname = fst.combineDirAndFile(self.dir, '{}_{}.png'.format('champion_mean-std_plot', self.datetime_str))
162 | plt.savefig(fname)
163 |
164 | # Get an avg final score for the best individ. You know this will be the best one because
165 | # the best one is preserved after getNextGen().
166 | best_individ = self.population[0]
167 |
168 | # Save the NN of the best individ.
169 | bestNN_fname = fst.combineDirAndFile(self.dir, f'bestNN_{self.agent_class.__name__}_{self.datetime_str}')
170 | best_individ.saveNetworkToFile(fname=(bestNN_fname + '.json'))
171 | best_individ.plotNetwork(show_plot=False, save_plot=True, fname=(bestNN_fname + '.png'), node_legend=True)
172 |
173 | # Something annoying happening with showing vs recording the final runs, but I'll figure it out later.
174 | best_individ_scores = [best_individ.runEpisode(N_episode_steps,
175 | show_episode=show_final_runs,
176 | record_episode=record_final_runs,
177 | **kwargs) for i in range(N_runs_with_best)]
178 | #best_individ.agent.closeEnv()
179 | best_individ_avg_score = np.mean(best_individ_scores)
180 |
181 | # Plot some more stuff with the saved dat
182 | try:
183 | rt.plotPopulationProperty(self.dir, 'all_FFs', make_hist_gif=False)
184 | rt.plotPopulationProperty(self.dir, 'weightcounts', make_hist_gif=False)
185 | except:
186 | print('\n\n')
187 | print(tb.format_exc())
188 | print('plotPopulationProperty() failed, continuing')
189 |
190 |
191 |
192 | try:
193 | if record_final_runs:
194 | N_side = min(3, floor(sqrt(N_runs_with_best)))
195 | movie_dir = best_individ.agent.record_dir
196 | movie_combine.combineMovieFiles(path=movie_dir, grid_size=f'{N_side}x{N_side}', make_gif=True)
197 | except:
198 | print('\n\n')
199 | print(tb.format_exc())
200 | print('failed combining movies into single panel')
201 |
202 | return_dict = {}
203 | return_dict['best_FFs'] = best_FFs
204 | return_dict['mean_FFs'] = mean_FFs
205 | return_dict['best_individ_avg_score'] = best_individ_avg_score
206 |
207 | return(return_dict)
208 |
209 |
210 | def getNextGen(self, FF_list):
211 |
212 | '''
213 | This first sorts the pop by the (index, FF) list passed to it.
214 | Then it takes the best_N of these indices in order. It starts the
215 | new_pop with a clones of the best individ from the last gen. Then it adds
216 | to new_pop by mutating the best_N until the pop is filled again.
217 |
218 | So, you can assume that for the new pop., pop[0] is the best one of the
219 | LAST generation.
220 |
221 | '''
222 |
223 | pop_indices_sorted = self.sortByFitnessFunction(FF_list)
224 | best_N = max(int(self.N_pop*self.best_N_frac), 2)
225 | #best_N = 1
226 | best_N_indices = [x[0] for x in pop_indices_sorted[:best_N]]
227 |
228 | new_pop = [self.population[best_N_indices[0]].clone()]
229 | mod_counter = 0
230 |
231 | while len(new_pop)0:
263 | par_index, child_index = random.choice(list(self.weights_list))
264 | self.addNodeInBetween(par_index, child_index)
265 |
266 |
267 | def mutateAddWeight(self, std=0.1):
268 | N_attempts = 4
269 | i = 0
270 | while True:
271 | if i>N_attempts:
272 | return(0)
273 | else:
274 | i += 1
275 | node_1_ind = random.choice(list(range(len(self.node_list))))
276 |
277 | # No self
278 | node_2_options = [ind for ind in range(len(self.node_list)) if ind != node_1_ind]
279 |
280 | if (node_1_ind in self.input_node_indices) or (node_1_ind == self.bias_node_index):
281 | node_2_options = [ind for ind in node_2_options if (ind not in self.input_node_indices) and (ind != self.bias_node_index)]
282 | weight_connection_options = [(node_1_ind, ind) for ind in node_2_options if ((node_1_ind, ind) not in self.weights_list)]
283 |
284 | elif node_1_ind in self.output_node_indices:
285 | node_2_options = [ind for ind in node_2_options if ind not in self.output_node_indices]
286 | weight_connection_options = [(ind, node_1_ind) for ind in node_2_options if ((ind, node_1_ind) not in self.weights_list)]
287 |
288 | else:
289 | #if it's neither an input or output
290 |
291 | # The options if node 2 is going to be the parent.
292 | node_2_weight_options_parent = [(ind, node_1_ind) for ind in node_2_options if (ind not in self.output_node_indices) and (not self.getsInputFrom(ind, node_1_ind))]
293 |
294 | # In both cases, we need to check that either node_2 is not in prop_order
295 | # (meaning it can go anywhere, provided it's not i/o), OR that
296 | # it doesn't get indirect input from ind.
297 | #
298 | # The options if node 2 is going to be the child.
299 | node_2_weight_options_child = [(node_1_ind, ind) for ind in node_2_options if ((ind not in self.input_node_indices) and (ind != self.bias_node_index)) and (not self.getsInputFrom(node_1_ind, ind))]
300 |
301 | # Combine them.
302 | weight_connection_options = node_2_weight_options_parent + node_2_weight_options_child
303 | weight_connection_options = [w for w in weight_connection_options if w not in self.weights_list]
304 |
305 | if len(weight_connection_options)==0:
306 | # If there aren't any options by this point, continue to try again
307 | continue
308 | else:
309 | weight_connection_tuple = random.choice(weight_connection_options)
310 | break
311 |
312 | self.addConnectingWeight(weight_connection_tuple, val=None, std=std)
313 |
314 |
315 | def mutateChangeWeight(self, std=0.1):
316 | if len(self.weights_list)>0:
317 | par_index, child_index = random.choice(list(self.weights_list))
318 | self.print('changing weight between {} and {}'.format(par_index, child_index))
319 | self.node_list[par_index].mutateOutputWeight(child_index, std=std)
320 |
321 |
322 | def mutateRemoveWeight(self):
323 | if len(self.weights_list)>0:
324 | par_index, child_index = random.choice(list(self.weights_list))
325 | self.print('removing weight between {} and {}'.format(par_index, child_index))
326 | self.removeConnectingWeight((par_index, child_index))
327 |
328 |
329 | def mutate(self, std=0.1):
330 |
331 | self.print('\n\nbefore mutate:')
332 | if self.verbose:
333 | self.printNetwork()
334 |
335 | if random.random() < self.node_add_chance:
336 | # Add a node by splitting an existing weight
337 | self.mutateAddNode()
338 |
339 |
340 | if random.random() < self.weight_add_chance:
341 | # Add weight between two nodes
342 | self.mutateAddWeight(std=std)
343 |
344 |
345 | if random.random() < self.weight_change_chance:
346 | # Change weight
347 | self.mutateChangeWeight(std=std)
348 |
349 |
350 | if random.random() < self.weight_remove_chance:
351 | # Remove weight
352 | self.mutateRemoveWeight()
353 |
354 |
355 | self.print('\nafter mutate:')
356 | if self.verbose:
357 | self.printNetwork()
358 |
359 |
360 |
361 |
362 | def getsInputFrom(self, n1_index, n2_index):
363 |
364 | # This is to check if n1 gets input from n2, indirectly.
365 |
366 | n1 = self.node_list[n1_index]
367 | n2 = self.node_list[n2_index]
368 | lineage_q = Queue()
369 | # You need this! Or it won't check its own parents!
370 | lineage_q.put(n1_index)
371 | [lineage_q.put(n) for n in n1.input_indices]
372 |
373 | while lineage_q.qsize() > 0:
374 | next = lineage_q.get()
375 | if n2_index in self.node_list[next].input_indices:
376 | return(True)
377 | else:
378 | [lineage_q.put(n) for n in self.node_list[next].input_indices]
379 |
380 | return(False)
381 |
382 |
383 |
384 | def propagateNodeOutput(self, node_index):
385 |
386 | # This assumes that the propagate_order list is already sorted!
387 | # If it isn't, you'll get some bad results.
388 | node = self.node_list[node_index]
389 |
390 | for target_node_index in node.getOutputIndices():
391 | self.node_list[target_node_index].addToInputsReceived(node.getValue()*node.output_weights[target_node_index])
392 |
393 |
394 | def forwardPass(self, input_vec):
395 |
396 | self.clearAllNodes()
397 |
398 | # Put the input vec into the input nodes
399 | for i, index in enumerate(self.input_node_indices):
400 | self.node_list[index].value = input_vec[i]
401 |
402 | # For each node in the sorted propagate list, propagate to its children
403 | for ind in self.propagate_order:
404 | self.propagateNodeOutput(ind)
405 |
406 | output_vec = np.array([self.node_list[ind].getValue() for ind in self.output_node_indices])
407 |
408 | if self.action_space_type == 'discrete':
409 | action = self.epsGreedyOutput(output_vec)
410 | elif self.action_space_type == 'continuous':
411 | # Need to fix if there are several cont. directions, but won't deal with that
412 | # for now. Actually, it seems like even when it's one continuous action, you're
413 | # supposed to supply it a list??
414 | action = output_vec
415 |
416 | return(action)
417 |
418 |
419 | def epsGreedyOutput(self, vec):
420 | if random.random() < self.epsilon:
421 | return(random.randint(0, len(vec)-1))
422 | else:
423 | return(self.greedyOutput(vec))
424 |
425 |
426 | def greedyOutput(self, vec):
427 | return(np.argmax(vec))
428 |
429 |
430 | def softmaxOutput(self, vec):
431 | a = np.array(vec)
432 | a = np.exp(a)
433 | a = a/sum(a)
434 | return(np.random.choice(list(range(len(a))), p=a))
435 |
436 |
437 |
438 | def setMaxEpisodeSteps(self, N_steps):
439 | self.agent.setMaxEpisodeSteps(N_steps)
440 |
441 |
442 | def clearAllNodes(self):
443 | [n.clearNode() for i,n in enumerate(self.node_list) if i!=self.bias_node_index]
444 |
445 |
446 | def runEpisode(self, N_steps, **kwargs):
447 |
448 |
449 | R_tot = 0
450 | Rs = []
451 |
452 | show_episode = kwargs.get('show_episode', False)
453 | record_episode = kwargs.get('record_episode', False)
454 |
455 | if show_episode:
456 | self.createFig()
457 |
458 | if record_episode:
459 | self.agent.setMonitorOn(show_run=show_episode)
460 |
461 | self.agent.initEpisode()
462 |
463 | for i in range(N_steps):
464 | self.clearAllNodes()
465 |
466 | if i%int(N_steps/10)==0:
467 | self.print('R_tot = {:.3f}'.format(R_tot))
468 |
469 |
470 | s = self.agent.getStateVec()
471 | a = self.forwardPass(s)
472 | self.print('s = {}, a = {}'.format(s, a))
473 |
474 | r, s, done = self.agent.iterate(a)
475 |
476 | R_tot += r
477 | Rs.append(R_tot)
478 |
479 | if done:
480 | #return(R_tot)
481 | break
482 |
483 | if show_episode or record_episode:
484 | if self.render_type == 'matplotlib':
485 | self.agent.drawState(self.axes[0])
486 | self.axes[1].clear()
487 | self.axes[1].plot(Rs)
488 | self.fig.canvas.draw()
489 | elif self.render_type == 'gym':
490 | self.agent.drawState()
491 |
492 |
493 | if record_episode:
494 | print('R_tot = {:.3f}'.format(R_tot))
495 |
496 | self.print('R_tot/N_steps = {:.3f}'.format(R_tot/N_steps))
497 |
498 | return(R_tot)
499 |
500 |
501 |
502 |
503 | def gaussMutate(self, std=0.1):
504 | # This mutates ALL of a node's output weights!
505 | for n in self.node_list:
506 | for w in n.getOutputIndices():
507 | n.output_weights[w] += np.random.normal(scale=std)
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 | def clone(self):
517 | clone = deepcopy(self)
518 | return(clone)
519 |
520 |
521 | def createFig(self):
522 | if self.render_type == 'matplotlib':
523 | self.fig, self.axes = plt.subplots(1,2, figsize=(16,8))
524 | plt.show(block=False)
525 |
526 |
527 |
528 |
529 | def print(self, str):
530 |
531 | if self.verbose:
532 | print(str)
533 |
534 |
535 |
536 | def printNetwork(self):
537 | print('\n')
538 | for i, n in enumerate(self.node_list):
539 | print('\nnode ', i)
540 | print('input indices:', n.input_indices)
541 | print('output indices: ', n.getOutputIndices())
542 | print('output weights: ', n.getOutputWeightStr())
543 |
544 | print()
545 |
546 |
547 | def plotNetwork(self, show_plot=True, save_plot=False, fname=None, node_legend=False):
548 |
549 | fig, ax = plt.subplots(1, 1, figsize=(12,8))
550 | DG = nx.DiGraph()
551 |
552 | other_node_indices = [i for i,n in enumerate(self.node_list) if ((i not in self.input_node_indices) and (i not in self.output_node_indices) and (i != self.bias_node_index))]
553 |
554 | DG.add_node(self.bias_node_index)
555 |
556 | for i in self.input_node_indices:
557 | DG.add_node(i)
558 |
559 | for i in self.output_node_indices:
560 | DG.add_node(i)
561 |
562 | # I think you have to add this, because if you have a node that doesn't have any connections
563 | # and it's not I/O/B, then it will never get entered into DG without this.
564 | for i in other_node_indices:
565 | DG.add_node(i)
566 |
567 | for n in self.node_list:
568 | for o in n.getOutputIndices():
569 | DG.add_edges_from([(n.node_index, o)])
570 |
571 | pos = nx.drawing.nx_agraph.graphviz_layout(DG, prog='dot')
572 |
573 | try:
574 | nx.draw_networkx_nodes(DG, nodelist=self.input_node_indices, pos=pos, node_color='lightgreen', node_size=600)
575 | nx.draw_networkx_nodes(DG, nodelist=self.output_node_indices, pos=pos, node_color='orange', node_size=600)
576 | nx.draw_networkx_nodes(DG, nodelist=[self.bias_node_index], pos=pos, node_color='forestgreen', node_size=600)
577 | nx.draw_networkx_nodes(DG, nodelist=other_node_indices, pos=pos, node_color='plum', node_size=600)
578 | except:
579 | print('problem drawing nx nodes. pos:')
580 | print(pos)
581 | exit()
582 |
583 | for w in self.weights_list:
584 | weight = self.node_list[w[0]].output_weights[w[1]]
585 | if weight < 0:
586 | nx.draw_networkx_edges(DG, pos=pos, edgelist=[w], width=4.0, alpha=min(abs(weight), 1), edge_color='tomato')
587 |
588 | if weight >= 0:
589 | nx.draw_networkx_edges(DG, pos=pos, edgelist=[w], width=4.0, alpha=min(abs(weight), 1), edge_color='dodgerblue')
590 |
591 | labels = {i:str(i) for i in range(len(self.node_list))}
592 | nx.draw_networkx_labels(DG, pos=pos, labels=labels, font_size=14)
593 | edge_labels = {w:'{:.2f}'.format(self.node_list[w[0]].output_weights[w[1]]) for w in self.weights_list}
594 | nx.draw_networkx_edge_labels(DG, pos=pos, edge_labels=edge_labels, font_size=10, bbox={'alpha':0.2, 'pad':0.0}, label_pos=0.85)
595 |
596 | plt.xticks([])
597 | plt.yticks([])
598 | plt.subplots_adjust(left=.2, bottom=0, right=1, top=1, wspace=1, hspace=0)
599 | ax.axis('off')
600 |
601 | if node_legend:
602 | if (self.agent.state_labels is not None) and (self.agent.action_labels is not None):
603 | props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
604 |
605 | percent_offset = 0.02
606 |
607 | bias_str = 'Bias: node {}\n\n'.format(self.bias_node_index)
608 | input_str = bias_str + 'Inputs:\n\n' + '\n'.join(['node {} = {}'.format(ind, self.agent.state_labels[i]) for i, ind in enumerate(self.input_node_indices)])
609 | ax.text(-percent_offset, (1-3*percent_offset), input_str, transform=ax.transAxes, fontsize=10, verticalalignment='top', horizontalalignment='right', bbox=props)
610 |
611 | output_str = 'Outputs:\n\n' + '\n'.join(['node {} = {}'.format(ind, self.agent.action_labels[i]) for i, ind in enumerate(self.output_node_indices)])
612 | ax.text(-percent_offset, 3*percent_offset, output_str, transform=ax.transAxes, fontsize=10, verticalalignment='bottom', horizontalalignment='right', bbox=props)
613 | textstr = input_str + '\n\n' + output_str
614 |
615 |
616 | # place a text box in upper left in axes coords
617 |
618 | if save_plot:
619 | if fname is not None:
620 | plt.savefig(fname)
621 | else:
622 | default_fname = 'misc_runs/{}_NN_{}.png'.format(self.agent_class.__name__, fst.getDateString())
623 | plt.savefig(default_fname)
624 |
625 | if show_plot:
626 | plt.show()
627 |
628 | plt.close()
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 | '''
646 | SCRAP
647 |
648 |
649 |
650 |
651 |
652 |
653 | # When finished, so all output nodes should be full
654 | self.print('\n\nProp. done, output node values:')
655 | for ind in self.output_node_indices:
656 | self.print('Node {} output: {:.3f}'.format(ind, self.node_list[ind].value))
657 |
658 |
659 |
660 |
661 | if par_index != self.bias_node_index:
662 | self.node_list[self.bias_node_index].addToOutputWeights(new_node.node_index)
663 | self.node_list[self.bias_node_index].output_weights[new_node.node_index] = 0
664 | new_node.addToInputIndices(self.bias_node_index)
665 |
666 |
667 |
668 | # Bias node
669 | bias_node = Node(len(self.node_list))
670 | bias_node.setToBiasNode()
671 | #bias_node.setOutputIndices(self.output_node_indices)
672 | self.node_list.append(bias_node)
673 |
674 |
675 | # Add input nodes
676 | for i in range(self.N_inputs):
677 | new_node = Node(len(self.node_list))
678 | new_node.setOutputIndices(self.output_node_indices)
679 | #uself.weights_list.append()
680 | new_node.setRandomOutputWeights()
681 | new_node.setToInputNode()
682 | self.node_list.append(new_node)
683 |
684 |
685 | # Add output nodes
686 | for i in range(self.N_total_outputs):
687 | new_node = Node(len(self.node_list))
688 | new_node.setInputIndices(self.input_node_indices)
689 | #new_node.addToInputIndices(self.bias_node_index)
690 | new_node.setToOutputNode()
691 | self.node_list.append(new_node)
692 |
693 | # Add hidden layer nodes
694 | for i in range(self.N_init_hidden_nodes):
695 | new_node = Node(len(self.node_list))
696 | new_node.setInputIndices(self.input_node_indices)
697 | #new_node.addToInputIndices(self.bias_node_index)
698 | new_node.setOutputIndices(self.output_node_indices)
699 | new_node.setRandomOutputWeights()
700 |
701 | #self.node_list[self.bias_node_index].addToOutputWeights(new_node.node_index)
702 |
703 | for ii in self.input_node_indices:
704 | self.node_list[ii].addToOutputWeights(new_node.node_index)
705 |
706 | for o in self.output_node_indices:
707 | self.node_list[o].addToInputIndices(new_node.node_index)
708 |
709 | self.node_list.append(new_node)
710 |
711 | # Set initial random output weight
712 | for i, n in enumerate(self.node_list):
713 | N_incoming_connect = n.getNInputs()
714 | for j in n.input_indices:
715 | self.node_list[j].output_weights[i] = np.random.normal(scale=(1.0/N_incoming_connect))
716 |
717 | # Set all the bias weights to 0 to start.
718 | for i in self.node_list[self.bias_node_index].getOutputIndices():
719 | self.node_list[self.bias_node_index].output_weights[i] = 0
720 |
721 | self.node_list[self.bias_node_index].value = 1
722 |
723 |
724 |
725 | if (node_1_ind not in self.propagate_order) and not ():
726 | # This one is easy: if it's not in propagate_order, then it's not connected to anything else,
727 | # so we can attach it to any other.
728 | node_2_ind = random.choice(node_2_options)
729 | if (node_2_ind in self.input_node_indices) or node_2_ind == self.bias_node_index:
730 | weight_connection_tuple = (node_2_ind, node_1_ind)
731 |
732 | elif node_2_ind in self.output_node_indices:
733 | weight_connection_tuple = (node_1_ind, node_2_ind)
734 |
735 | else:
736 | if random.random() < 0.5:
737 | weight_connection_tuple = (node_1_ind, node_2_ind)
738 | else:
739 | weight_connection_tuple = (node_2_ind, node_1_ind)
740 |
741 | break
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 | '''
756 |
757 |
758 | #
759 |
--------------------------------------------------------------------------------