├── 01-basics
    └── linear_regression
├── A3C-DDPG.py
├── PER-and-ACQ
    ├── AC-DQN.py
    ├── net.py
    ├── prioritized-DQN.py
    ├── prioritized_memory.py
    └── util.py
├── README.md
├── convolutional_neural_network
    └── main.py
├── ddpg.py
├── double-DQN.py
├── dueling-DQN.py
├── feedforward_neural_network
    └── main.py
├── ga
    ├── bag.py
    └── peak.py
├── gym_sample
    └── demo.py
├── native-Qlearning.py
├── nature-DQN.py
├── neat
    ├── Digraph.gv
    ├── Digraph.gv.svg
    ├── avg_fitness.svg
    ├── cartpole.py
    ├── config
    ├── speciation.svg
    └── visualize.py
├── net.py
├── nips-DQN.py
├── recurrent_neural_network
    └── main.py
└── util.py


/01-basics/linear_regression:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | 
 7 | # Hyper-parameters
 8 | input_size = 1
 9 | output_size = 1
10 | num_epochs = 60
11 | learning_rate = 0.001
12 | 
13 | # Toy dataset
14 | x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168], 
15 |                     [9.779], [6.182], [7.59], [2.167], [7.042], 
16 |                     [10.791], [5.313], [7.997], [3.1]], dtype=np.float32)
17 | 
18 | y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573], 
19 |                     [3.366], [2.596], [2.53], [1.221], [2.827], 
20 |                     [3.465], [1.65], [2.904], [1.3]], dtype=np.float32)
21 | 
22 | # Linear regression model
23 | model = nn.Linear(input_size, output_size)
24 | 
25 | # Loss and optimizer
26 | criterion = nn.MSELoss()
27 | optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  
28 | 
29 | # Train the model
30 | for epoch in range(num_epochs):
31 |     # Convert numpy arrays to torch tensors
32 |     inputs = torch.from_numpy(x_train)
33 |     targets = torch.from_numpy(y_train)
34 | 
35 |     # Forward pass
36 |     outputs = model(inputs)
37 |     loss = criterion(outputs, targets)
38 |     
39 |     # Backward and optimize
40 |     optimizer.zero_grad()
41 |     loss.backward()
42 |     optimizer.step()
43 |     
44 |     if (epoch+1) % 5 == 0:
45 |         print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
46 | 
47 | # Plot the graph
48 | predicted = model(torch.from_numpy(x_train)).detach().numpy()
49 | plt.plot(x_train, y_train, 'ro', label='Original data')
50 | plt.plot(x_train, predicted, label='Fitted line')
51 | plt.legend()
52 | plt.show()
53 | 
54 | # Save the model checkpoint
55 | torch.save(model.state_dict(), 'model.ckpt')
56 | 


--------------------------------------------------------------------------------
/A3C-DDPG.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch, time, gym, argparse, sys
  3 | import numpy as np
  4 | from scipy.signal import lfilter
  5 | from scipy.misc import imresize
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | import torch.multiprocessing as mp
  9 | 
 10 | parser = argparse.ArgumentParser()
 11 | parser.add_argument('--env', default='Breakout-v4', type=str, help='gym environment')
 12 | parser.add_argument('--processes', default=1, type=int, help='number of processes')
 13 | parser.add_argument('--lr', default=1e-4, type=float, help='learning rate')
 14 | parser.add_argument('--gamma', default=0.99, type=float, help='rewards discount factor')
 15 | parser.add_argument('--seed', default=1, type=int, help='random seed')
 16 | args = parser.parse_args()
 17 | discount = lambda x, gamma: lfilter([1], [1, -gamma], x[::-1])[::-1]
 18 | prepro = lambda img: imresize(img[35:195].mean(2), (80, 80)).astype(np.float32).reshape(1, 80, 80) / 255.
 19 | 
 20 | 
 21 | class NNPolicy(nn.Module):
 22 |     def __init__(self, num_actions):
 23 |         super(NNPolicy, self).__init__()
 24 |         self.conv1 = nn.Conv2d(1, 32, 3, stride=2, padding=1)
 25 |         self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
 26 |         self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
 27 |         self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
 28 |         self.gru = nn.GRUCell(32 * 5 * 5, 256)
 29 |         self.critic_net, self.actor_net = nn.Linear(256, 1), nn.Linear(256, num_actions)
 30 | 
 31 |     def forward(self, inputs, train=True, hard=False):
 32 |         inputs, hx = inputs
 33 |         x = F.elu(self.conv1(inputs))
 34 |         x = F.elu(self.conv2(x))
 35 |         x = F.elu(self.conv3(x))
 36 |         x = F.elu(self.conv4(x))
 37 |         hx = self.gru(x.view(-1, 32 * 5 * 5), (hx))
 38 |         return self.critic_net(hx), self.actor_net(hx), hx
 39 | 
 40 | 
 41 | class SharedAdam(torch.optim.Adam):
 42 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
 43 |         super(SharedAdam, self).__init__(params, lr, betas, eps, weight_decay)
 44 |         for group in self.param_groups:
 45 |             for p in group['params']:
 46 |                 state = self.state[p]
 47 |                 state['shared_steps'], state['step'] = torch.zeros(1).share_memory_(), 0
 48 |                 state['exp_avg'] = p.data.new().resize_as_(p.data).zero_().share_memory_()
 49 |                 state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_().share_memory_()
 50 | 
 51 | 
 52 | def loss_func(args, values, logps, actions, rewards):
 53 |     np_values = values.view(-1).data.numpy()
 54 | 
 55 |     delta_t = np.asarray(rewards) + args.gamma * np_values[1:] - np_values[:-1]
 56 |     logpys = logps.gather(1, torch.tensor(actions).view(-1, 1))
 57 |     gen_adv_est = discount(delta_t, args.gamma)
 58 |     policy_loss = -(logpys.view(-1) * torch.FloatTensor(gen_adv_est.copy())).sum()
 59 | 
 60 |     rewards[-1] += args.gamma * np_values[-1]
 61 |     discounted_r = discount(np.asarray(rewards), args.gamma)
 62 |     discounted_r = torch.tensor(discounted_r.copy(), dtype=torch.float32)
 63 |     value_loss = .5 * (discounted_r - values[:-1, 0]).pow(2).sum()
 64 | 
 65 |     entropy_loss = -(-logps * torch.exp(logps)).sum()
 66 |     return policy_loss + 0.5 * value_loss + 0.01 * entropy_loss
 67 | 
 68 | 
 69 | def worker(shared_model, shared_optimizer, rank, args, info):
 70 |     env = gym.make(args.env)
 71 |     env.seed(args.seed + rank)
 72 |     torch.manual_seed(args.seed + rank)
 73 |     model = NNPolicy(num_actions=args.num_actions)
 74 |     state = torch.tensor(prepro(env.reset()))
 75 | 
 76 |     start_time = last_disp_time = time.time()
 77 |     episode_length, epr, eploss, done = 0, 0, 0, True
 78 | 
 79 |     while info['frames'][0] <= 4e7:
 80 |         model.load_state_dict(shared_model.state_dict())
 81 | 
 82 |         hx = torch.zeros(1, 256) if done else hx.detach()
 83 |         values, logps, actions, rewards = [], [], [], []
 84 | 
 85 |         for step in range(4):
 86 |             episode_length += 1
 87 |             value, logit, hx = model((state.view(1, 1, 80, 80), hx))
 88 |             logp = F.log_softmax(logit, dim=-1)
 89 | 
 90 |             action = torch.exp(logp).multinomial(num_samples=1).data[0]
 91 |             state, reward, done, _ = env.step(action.numpy()[0])
 92 |             # env.render()
 93 | 
 94 |             state = torch.tensor(prepro(state))
 95 |             epr += reward
 96 |             reward = np.clip(reward, -1, 1)
 97 |             done = done or episode_length >= 1e4
 98 | 
 99 |             info['frames'].add_(1)
100 |             num_frames = int(info['frames'].item())
101 | 
102 |             if done:
103 |                 info['episodes'] += 1
104 |                 interp = 1 if info['episodes'][0] == 1 else 0.01
105 |                 info['run_epr'].mul_(1 - interp).add_(interp * epr)
106 |                 info['run_loss'].mul_(1 - interp).add_(interp * eploss)
107 | 
108 |             if rank == 0 and time.time() - last_disp_time > 60:
109 |                 elapsed = time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time))
110 |                 print('time {}, episodes {:.0f}, frames {:.1f}M, mean epr {:.2f}, run loss {:.2f}'
111 |                          .format(elapsed, info['episodes'].item(), num_frames / 1e6,
112 |                                  info['run_epr'].item(), info['run_loss'].item()))
113 |                 last_disp_time = time.time()
114 | 
115 |             if done:
116 |                 episode_length, epr, eploss = 0, 0, 0
117 |                 state = torch.tensor(prepro(env.reset()))
118 | 
119 |             values.append(value)
120 |             logps.append(logp)
121 |             actions.append(action)
122 |             rewards.append(reward)
123 | 
124 |         next_value = torch.zeros(1, 1) if done else model((state.unsqueeze(0), hx))[0]
125 |         values.append(next_value.detach())
126 | 
127 |         loss = loss_func(args, torch.cat(values), torch.cat(logps), torch.cat(actions), np.asarray(rewards))
128 |         eploss += loss.item()
129 |         shared_optimizer.zero_grad()
130 |         loss.backward()
131 |         torch.nn.utils.clip_grad_norm_(model.parameters(), 40)
132 | 
133 |         for param, shared_param in zip(model.parameters(), shared_model.parameters()):
134 |             if shared_param.grad is None:
135 |                 shared_param._grad = param.grad
136 |         shared_optimizer.step()
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     if sys.version_info[0] > 2:
141 |         mp.set_start_method('spawn')
142 |     elif sys.platform == 'linux' or sys.platform == 'linux2':
143 |         raise "Must be using Python 3 with linux! Or else you get a deadlock in conv2d"
144 | 
145 |     args.num_actions = gym.make(args.env).action_space.n
146 | 
147 |     torch.manual_seed(args.seed)
148 |     shared_model = NNPolicy(num_actions=args.num_actions).share_memory()
149 |     shared_optimizer = SharedAdam(shared_model.parameters(), lr=args.lr)
150 | 
151 |     info = {k: torch.DoubleTensor([0]).share_memory_() for k in ['run_epr', 'run_loss', 'episodes', 'frames']}
152 | 
153 |     processes = []
154 |     for rank in range(args.processes):
155 |         p = mp.Process(target=worker, args=(shared_model, shared_optimizer, rank, args, info))
156 |         p.start()
157 |         processes.append(p)
158 |     for p in processes: p.join()
159 | 


--------------------------------------------------------------------------------
/PER-and-ACQ/AC-DQN.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch, time, gym, argparse, sys
  3 | import numpy as np
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torch.multiprocessing as mp
  7 | 
  8 | from collections import deque
  9 | import random
 10 | from net import AtariNet
 11 | from util import preprocess
 12 | 
 13 | LR = 0.001
 14 | EXPLORE = 1000000
 15 | GAMMA = 0.99
 16 | N_STEP = 4
 17 | ENV = 'Pong-v0'
 18 | ACTIONS_SIZE = gym.make(ENV).action_space.n
 19 | PROCESSES = 1
 20 | SEED = 1
 21 | 
 22 | 
 23 | class Agent(object):
 24 |     def __init__(self, action_size):
 25 |         self.action_size = action_size
 26 |         self.EPSILON = 1.0
 27 |         self.network = AtariNet(action_size)
 28 |         self.memory = deque()
 29 |         self.optimizer = torch.optim.Adam(self.network.parameters(), lr=LR)
 30 |         self.loss_func = nn.MSELoss()
 31 | 
 32 |     def action(self, state, israndom):
 33 |         if israndom and random.random() < self.EPSILON:
 34 |             return np.random.randint(0, self.action_size)
 35 |         state = torch.unsqueeze(torch.FloatTensor(state), 0)
 36 |         actions_value = self.network.forward(state)
 37 |         return torch.max(actions_value, 1)[1].data.numpy()[0]
 38 | 
 39 |     def add(self, state, action, reward, next_state, done):
 40 |         if done:
 41 |             self.memory.append((state, action, reward, next_state, 0))
 42 |         else:
 43 |             self.memory.append((state, action, reward, next_state, 1))
 44 | 
 45 |     def learn(self, shared_optimizer, shared_model):
 46 |         batch_size = len(self.memory)
 47 |         batch = random.sample(self.memory, batch_size)
 48 |         state = torch.FloatTensor([x[0] for x in batch])
 49 |         action = torch.LongTensor([[x[1]] for x in batch])
 50 |         reward = torch.FloatTensor([[x[2]] for x in batch])
 51 |         next_state = torch.FloatTensor([x[3] for x in batch])
 52 |         done = torch.FloatTensor([[x[4]] for x in batch])
 53 | 
 54 |         eval_q = self.network.forward(state).gather(1, action)
 55 |         next_q = self.network(next_state).detach()
 56 |         target_q = reward + GAMMA * next_q.max(1)[0].view(batch_size, 1) * done
 57 |         loss = self.loss_func(eval_q, target_q)
 58 | 
 59 |         shared_optimizer.zero_grad()
 60 |         loss.backward()
 61 |         for param, shared_param in zip(self.network.parameters(), shared_model.parameters()):
 62 |             if shared_param.grad is None:
 63 |                 shared_param._grad = param.grad
 64 |         shared_optimizer.step()
 65 | 
 66 |         self.memory = deque()
 67 |         if self.EPSILON > 0.1:
 68 |             self.EPSILON -= (1.0 - 0.1) / EXPLORE
 69 | 
 70 | 
 71 | class SharedAdam(torch.optim.Adam):
 72 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
 73 |         super(SharedAdam, self).__init__(params, lr, betas, eps, weight_decay)
 74 |         for group in self.param_groups:
 75 |             for p in group['params']:
 76 |                 state = self.state[p]
 77 |                 state['shared_steps'], state['step'] = torch.zeros(1).share_memory_(), 0
 78 |                 state['exp_avg'] = p.data.new().resize_as_(p.data).zero_().share_memory_()
 79 |                 state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_().share_memory_()
 80 | 
 81 | 
 82 | def worker(shared_model, shared_optimizer, rank, info):
 83 |     env = gym.make(ENV)
 84 |     env.seed(SEED + rank)
 85 |     torch.manual_seed(SEED + rank)
 86 |     agent = Agent(ACTIONS_SIZE)
 87 | 
 88 |     start_time = last_disp_time = time.time()
 89 |     episode_length, epr = 0, 0
 90 | 
 91 |     state = env.reset()
 92 |     state = preprocess(state)
 93 |     while info['frames'][0] <= 4e7:
 94 |         agent.network.load_state_dict(shared_model.state_dict())
 95 | 
 96 |         for _ in range(N_STEP):
 97 |             # env.render()
 98 |             episode_length += 1
 99 | 
100 |             action = agent.action(state, True)
101 |             next_state, reward, done, ext = env.step(action)
102 |             epr += reward
103 |             done = done or episode_length >= 1e4
104 |             info['frames'].add_(1)
105 |             num_frames = int(info['frames'].item())
106 | 
107 |             next_state = preprocess(next_state)
108 |             agent.add(state, action, reward, next_state, done)
109 | 
110 |             state = next_state
111 | 
112 |             if done:
113 |                 info['episodes'] += 1
114 |                 interp = 1 if info['episodes'][0] == 1 else 0.01
115 |                 info['run_epr'].mul_(1 - interp).add_(interp * epr)
116 | 
117 |             if rank == 0 and time.time() - last_disp_time > 60:
118 |                 elapsed = time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time))
119 |                 print('time {}, episodes {:.0f}, frames {:.1f}M, mean epr {:.2f}'
120 |                          .format(elapsed, info['episodes'].item(), num_frames / 1e6,
121 |                                  info['run_epr'].item()))
122 |                 last_disp_time = time.time()
123 | 
124 |             if done:
125 |                 episode_length, epr, eploss = 0, 0, 0
126 |                 state = env.reset()
127 |                 state = preprocess(state)
128 |                 break
129 | 
130 |         agent.learn(shared_optimizer, shared_model)
131 | 
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     if sys.version_info[0] > 2:
136 |         mp.set_start_method('spawn')
137 |     elif sys.platform == 'linux' or sys.platform == 'linux2':
138 |         raise "Must be using Python 3 with linux! Or else you get a deadlock in conv2d"
139 | 
140 |     torch.manual_seed(SEED)
141 |     shared_model = AtariNet(ACTIONS_SIZE).share_memory()
142 |     shared_optimizer = SharedAdam(shared_model.parameters(), lr=LR)
143 | 
144 |     info = {k: torch.DoubleTensor([0]).share_memory_() for k in ['run_epr', 'episodes', 'frames']}
145 | 
146 |     processes = []
147 |     for rank in range(PROCESSES):
148 |         p = mp.Process(target=worker, args=(shared_model, shared_optimizer, rank, info))
149 |         p.start()
150 |         processes.append(p)
151 |     for p in processes: p.join()
152 | 


--------------------------------------------------------------------------------
/PER-and-ACQ/net.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class AtariNet(nn.Module):
 6 | 
 7 |     def __init__(self, num_actions):
 8 |         super(AtariNet, self).__init__()
 9 |         self.conv1 = nn.Sequential(
10 |             nn.Conv2d(1, 32, kernel_size=8, stride=4),
11 |             nn.ReLU()
12 |         )
13 |         self.conv2 = nn.Sequential(
14 |             nn.Conv2d(32, 64, kernel_size=4, stride=2),
15 |             nn.ReLU()
16 |         )
17 |         self.conv3 = nn.Sequential(
18 |             nn.Conv2d(64, 64, kernel_size=3, stride=1),
19 |             nn.ReLU()
20 |         )
21 |         self.hidden = nn.Sequential(
22 |             nn.Linear(64 * 7 * 7, 512, bias=True),
23 |             nn.ReLU()
24 |         )
25 |         self.out = nn.Sequential(
26 |             nn.Linear(512, num_actions, bias=True)
27 |         )
28 |         self.apply(self.init_weights)
29 | 
30 |     def init_weights(self, m):
31 |         if type(m) == nn.Conv2d:
32 |             m.weight.data.normal_(0.0, 0.02)
33 |         if type(m) == nn.Linear:
34 |             torch.nn.init.xavier_uniform_(m.weight)
35 |             m.bias.data.fill_(0.01)
36 | 
37 |     def forward(self, x):
38 |         x = self.conv1(x)
39 |         x = self.conv2(x)
40 |         x = self.conv3(x)
41 |         x = x.view(x.size(0), -1)
42 |         x = self.hidden(x)
43 |         x = self.out(x)
44 |         return x
45 | 
46 | 
47 | class CnnDQN(nn.Module):
48 |     def __init__(self, inputs_shape, num_actions):
49 |         super(CnnDQN, self).__init__()
50 | 
51 |         self.inut_shape = inputs_shape
52 |         self.num_actions = num_actions
53 | 
54 |         self.features = nn.Sequential(
55 |             nn.Conv2d(inputs_shape[0], 32, kernel_size=8, stride=4),
56 |             nn.ReLU(),
57 |             nn.Conv2d(32, 64, kernel_size=4, stride=2),
58 |             nn.ReLU(),
59 |             nn.Conv2d(64, 64, kernel_size=3, stride=1),
60 |             nn.ReLU()
61 |         )
62 | 
63 |         self.fc = nn.Sequential(
64 |             nn.Linear(self.features_size(), 512),
65 |             nn.ReLU(),
66 |             nn.Linear(512, self.num_actions)
67 |         )
68 | 
69 |     def forward(self, x):
70 |         x = self.features(x)
71 |         x = x.view(x.size(0), -1)
72 |         x = self.fc(x)
73 |         return x
74 | 
75 |     def features_size(self):
76 |         return self.features(torch.zeros(1, *self.inut_shape)).view(1, -1).size(1)
77 | 


--------------------------------------------------------------------------------
/PER-and-ACQ/prioritized-DQN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from prioritized_memory import Memory
  4 | import numpy as np
  5 | import gym
  6 | import random
  7 | from net import AtariNet
  8 | from util import preprocess
  9 | 
 10 | BATCH_SIZE = 32
 11 | LR = 0.001
 12 | START_EPSILON = 1.0
 13 | FINAL_EPSILON = 0.1
 14 | EPSILON = START_EPSILON
 15 | EXPLORE = 1000000
 16 | GAMMA = 0.99
 17 | TOTAL_EPISODES = 10000000
 18 | MEMORY_SIZE = 1000000
 19 | MEMORY_THRESHOLD = 100000
 20 | UPDATE_TIME = 10000
 21 | TEST_FREQUENCY = 1000
 22 | env = gym.make('Pong-v0')
 23 | env = env.unwrapped
 24 | ACTIONS_SIZE = env.action_space.n
 25 | 
 26 | 
 27 | class Agent(object):
 28 |     def __init__(self):
 29 |         self.network, self.target_network = AtariNet(ACTIONS_SIZE), AtariNet(ACTIONS_SIZE)
 30 |         self.memory = Memory(MEMORY_SIZE)
 31 |         self.learning_count = 0
 32 |         self.optimizer = torch.optim.Adam(self.network.parameters(), lr=LR)
 33 |         self.loss_func = nn.MSELoss()
 34 | 
 35 |     def action(self, state, israndom):
 36 |         if israndom and random.random() < EPSILON:
 37 |             return np.random.randint(0, ACTIONS_SIZE)
 38 |         state = torch.unsqueeze(torch.FloatTensor(state), 0)
 39 |         actions_value = self.network.forward(state)
 40 |         return torch.max(actions_value, 1)[1].data.numpy()[0]
 41 | 
 42 |     def learn(self, state, action, reward, next_state, done):
 43 |         old_val = self.network.forward(torch.FloatTensor([state])).gather(1, torch.LongTensor([[action]]))[0]
 44 |         target_val = self.network.forward(torch.FloatTensor([state]))
 45 |         if done:
 46 |             done = 0
 47 |             target = reward
 48 |         else:
 49 |             done = 1
 50 |             target = reward + GAMMA * torch.max(target_val)
 51 |         error = abs(old_val[0] - target)
 52 |         self.memory.add(error.data, (state, action, reward, next_state, done))
 53 |         if self.memory.tree.n_entries < MEMORY_THRESHOLD:
 54 |             return
 55 | 
 56 |         if self.learning_count % UPDATE_TIME == 0:
 57 |             self.target_network.load_state_dict(self.network.state_dict())
 58 |         self.learning_count += 1
 59 | 
 60 |         batch, idxs, is_weights = self.memory.sample(BATCH_SIZE)
 61 |         state = torch.FloatTensor([x[0] for x in batch])
 62 |         action = torch.LongTensor([[x[1]] for x in batch])
 63 |         reward = torch.FloatTensor([[x[2]] for x in batch])
 64 |         next_state = torch.FloatTensor([x[3] for x in batch])
 65 |         done = torch.FloatTensor([[x[4]] for x in batch])
 66 | 
 67 |         eval_q = self.network.forward(state).gather(1, action)
 68 |         next_q = self.target_network(next_state).detach()
 69 |         target_q = reward + GAMMA * next_q.max(1)[0].view(BATCH_SIZE, 1) * done
 70 |         errors = torch.abs(eval_q - target_q).data.numpy().flatten()
 71 |         loss = self.loss_func(eval_q, target_q)
 72 | 
 73 |         for i in range(BATCH_SIZE):
 74 |             idx = idxs[i]
 75 |             self.memory.update(idx, errors[i])
 76 | 
 77 |         self.optimizer.zero_grad()
 78 |         loss.backward()
 79 |         self.optimizer.step()
 80 | 
 81 | 
 82 | agent = Agent()
 83 | 
 84 | for i_episode in range(TOTAL_EPISODES):
 85 |     state = env.reset()
 86 |     state = preprocess(state)
 87 |     while True:
 88 |         # env.render()
 89 |         action = agent.action(state, True)
 90 |         next_state, reward, done, info = env.step(action)
 91 |         next_state = preprocess(next_state)
 92 |         agent.learn(state, action, reward, next_state, done)
 93 | 
 94 |         state = next_state
 95 |         if done:
 96 |             break
 97 |     if EPSILON > FINAL_EPSILON:
 98 |         EPSILON -= (START_EPSILON - FINAL_EPSILON) / EXPLORE
 99 | 
100 |     # TEST
101 |     if i_episode % TEST_FREQUENCY == 0:
102 |         state = env.reset()
103 |         state = preprocess(state)
104 |         total_reward = 0
105 |         while True:
106 |             # env.render()
107 |             action = agent.action(state, israndom=False)
108 |             next_state, reward, done, info = env.step(action)
109 |             next_state = preprocess(next_state)
110 | 
111 |             total_reward += reward
112 | 
113 |             state = next_state
114 |             if done:
115 |                 break
116 |         print('episode: {} , total_reward: {}'.format(i_episode, round(total_reward, 3)))
117 | 
118 | env.close()
119 | 


--------------------------------------------------------------------------------
/PER-and-ACQ/prioritized_memory.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | 
 5 | class SumTree:
 6 |     write = 0
 7 | 
 8 |     def __init__(self, capacity):
 9 |         self.capacity = capacity
10 |         self.tree = np.zeros(2 * capacity - 1)
11 |         self.data = np.zeros(capacity, dtype=object)
12 |         self.n_entries = 0
13 | 
14 |     def _propagate(self, idx, change):
15 |         parent = (idx - 1) // 2
16 |         self.tree[parent] += change
17 |         if parent != 0:
18 |             self._propagate(parent, change)
19 | 
20 |     def _retrieve(self, idx, s):
21 |         left = 2 * idx + 1
22 |         right = left + 1
23 |         if left >= len(self.tree):
24 |             return idx
25 | 
26 |         if s <= self.tree[left]:
27 |             return self._retrieve(left, s)
28 |         else:
29 |             return self._retrieve(right, s - self.tree[left])
30 | 
31 |     def total(self):
32 |         return self.tree[0]
33 | 
34 |     def add(self, p, data):
35 |         idx = self.write + self.capacity - 1
36 |         self.data[self.write] = data
37 |         self.update(idx, p)
38 |         self.write += 1
39 |         if self.write >= self.capacity:
40 |             self.write = 0
41 | 
42 |         if self.n_entries < self.capacity:
43 |             self.n_entries += 1
44 | 
45 |     def update(self, idx, p):
46 |         change = p - self.tree[idx]
47 |         self.tree[idx] = p
48 |         self._propagate(idx, change)
49 | 
50 |     def get(self, s):
51 |         idx = self._retrieve(0, s)
52 |         dataIdx = idx - self.capacity + 1
53 |         return (idx, self.tree[idx], self.data[dataIdx])
54 | 
55 | 
56 | class Memory:
57 |     e = 0.01
58 |     a = 0.6
59 |     beta = 0.4
60 |     beta_increment_per_sampling = 0.001
61 | 
62 |     def __init__(self, capacity):
63 |         self.tree = SumTree(capacity)
64 |         self.capacity = capacity
65 | 
66 |     def _get_priority(self, error):
67 |         return (error + self.e) ** self.a
68 | 
69 |     def add(self, error, sample):
70 |         p = self._get_priority(error)
71 |         self.tree.add(p, sample)
72 | 
73 |     def sample(self, n):
74 |         batch = []
75 |         idxs = []
76 |         segment = self.tree.total() / n
77 |         priorities = []
78 |         self.beta = np.min([1., self.beta + self.beta_increment_per_sampling])
79 |         for i in range(n):
80 |             a = segment * i
81 |             b = segment * (i + 1)
82 |             s = random.uniform(a, b)
83 |             (idx, p, data) = self.tree.get(s)
84 |             priorities.append(p)
85 |             batch.append(data)
86 |             idxs.append(idx)
87 |         sampling_probabilities = priorities / self.tree.total()
88 |         is_weight = np.power(self.tree.n_entries * sampling_probabilities, -self.beta)
89 |         is_weight /= is_weight.max()
90 |         return batch, idxs, is_weight
91 | 
92 |     def update(self, idx, error):
93 |         p = self._get_priority(error)
94 |         self.tree.update(idx, p)


--------------------------------------------------------------------------------
/PER-and-ACQ/util.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | 
 5 | def preprocess(observation):
 6 |     """
 7 |     image preprocess
 8 |     :param observation:
 9 |     :return:
10 |     """
11 |     observation = cv2.cvtColor(cv2.resize(observation, (84, 110)), cv2.COLOR_BGR2GRAY)
12 |     observation = observation[26:110,:]
13 |     ret, observation = cv2.threshold(observation,1,255,cv2.THRESH_BINARY)
14 |     x = np.reshape(observation,(84,84,1))
15 |     return x.transpose((2, 0, 1))


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 《白话强化学习与Pytorch》代码
 2 | 
 3 | ## 5.2 Q-Learning：
 4 | 
 5 | native-Qlearning.py
 6 | 
 7 | ## 6 Deep Learning：
 8 | 
 9 | feedforward_neural_network/
10 | 
11 | convolutional_neural_network/
12 | 
13 | recurrent_neural_network/
14 | 
15 | ## 8.1 NIPS DQN：
16 | 
17 | nips-DQN.py
18 | 
19 | ## 8.2 Nature DQN：
20 | 
21 | nature-DQN.py
22 | 
23 | ## 8.3 Double DQN：
24 | 
25 | double-DQN.py
26 | 
27 | ## 8.4 Dueling DQN：
28 | 
29 | dueling-DQN.py
30 | 
31 | ## 9.4 DDPG：
32 | 
33 | ddpg.py
34 | 
35 | ## 10.1.2 A3C DDPG：
36 | 
37 | A3C-DDPG.py
38 | 
39 | ## 联系作者
40 | 
41 | ### 邮箱：zhenbinye@gmail.com，77232517@qq.com
42 | 
43 | ### 代码持续更新，您若有改进建议或者问题请联系作者
44 | 
45 | ### 由于版本更迭，代码或许和书中印刷内容略有出入，敬请谅解
46 | 


--------------------------------------------------------------------------------
/convolutional_neural_network/main.py:
--------------------------------------------------------------------------------
  1 | import torch 
  2 | import torch.nn as nn
  3 | import torchvision
  4 | import torchvision.transforms as transforms
  5 | 
  6 | 
  7 | # Device configuration
  8 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  9 | 
 10 | # Hyper parameters
 11 | num_epochs = 5
 12 | num_classes = 10
 13 | batch_size = 100
 14 | learning_rate = 0.001
 15 | 
 16 | # MNIST dataset
 17 | train_dataset = torchvision.datasets.MNIST(root='../../data/',
 18 |                                            train=True, 
 19 |                                            transform=transforms.ToTensor(),
 20 |                                            download=True)
 21 | 
 22 | test_dataset = torchvision.datasets.MNIST(root='../../data/',
 23 |                                           train=False, 
 24 |                                           transform=transforms.ToTensor())
 25 | 
 26 | # Data loader
 27 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
 28 |                                            batch_size=batch_size, 
 29 |                                            shuffle=True)
 30 | 
 31 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
 32 |                                           batch_size=batch_size, 
 33 |                                           shuffle=False)
 34 | 
 35 | # Convolutional neural network (two convolutional layers)
 36 | class ConvNet(nn.Module):
 37 |     def __init__(self, num_classes=10):
 38 |         super(ConvNet, self).__init__()
 39 |         self.layer1 = nn.Sequential(
 40 |             nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
 41 |             nn.BatchNorm2d(16),
 42 |             nn.ReLU(),
 43 |             nn.MaxPool2d(kernel_size=2, stride=2))
 44 |         self.layer2 = nn.Sequential(
 45 |             nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
 46 |             nn.BatchNorm2d(32),
 47 |             nn.ReLU(),
 48 |             nn.MaxPool2d(kernel_size=2, stride=2))
 49 |         self.fc = nn.Linear(7*7*32, num_classes)
 50 |         
 51 |     def forward(self, x):
 52 |         out = self.layer1(x)
 53 |         out = self.layer2(out)
 54 |         out = out.reshape(out.size(0), -1)
 55 |         out = self.fc(out)
 56 |         return out
 57 | 
 58 | model = ConvNet(num_classes).to(device)
 59 | 
 60 | # Loss and optimizer
 61 | criterion = nn.CrossEntropyLoss()
 62 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
 63 | 
 64 | # Train the model
 65 | total_step = len(train_loader)
 66 | for epoch in range(num_epochs):
 67 |     for i, (images, labels) in enumerate(train_loader):
 68 |         images = images.to(device)
 69 |         labels = labels.to(device)
 70 |         
 71 |         # Forward pass
 72 |         outputs = model(images)
 73 |         loss = criterion(outputs, labels)
 74 |         
 75 |         # Backward and optimize
 76 |         optimizer.zero_grad()
 77 |         loss.backward()
 78 |         optimizer.step()
 79 |         
 80 |         if (i+1) % 100 == 0:
 81 |             print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
 82 |                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
 83 | 
 84 | # Test the model
 85 | model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
 86 | with torch.no_grad():
 87 |     correct = 0
 88 |     total = 0
 89 |     for images, labels in test_loader:
 90 |         images = images.to(device)
 91 |         labels = labels.to(device)
 92 |         outputs = model(images)
 93 |         _, predicted = torch.max(outputs.data, 1)
 94 |         total += labels.size(0)
 95 |         correct += (predicted == labels).sum().item()
 96 | 
 97 |     print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))
 98 | 
 99 | # Save the model checkpoint
100 | torch.save(model.state_dict(), 'model.ckpt')
101 | 


--------------------------------------------------------------------------------
/ddpg.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch, gym, argparse
  3 | import torch.nn as nn
  4 | from torch.autograd import Variable
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | class ReplayBuffer(object):
  9 |     def __init__(self, max_size=1e6):
 10 |         self.storage = []
 11 |         self.max_size = max_size
 12 |         self.ptr = 0
 13 | 
 14 |     def add(self, data):
 15 |         if len(self.storage) == self.max_size:
 16 |             self.storage[int(self.ptr)] = data
 17 |             self.ptr = (self.ptr + 1) % self.max_size
 18 |         else:
 19 |             self.storage.append(data)
 20 | 
 21 |     def sample(self, batch_size):
 22 |         ind = np.random.randint(0, len(self.storage), size=batch_size)
 23 |         x, y, u, r, d = [], [], [], [], []
 24 |         for i in ind:
 25 |             X, Y, U, R, D = self.storage[i]
 26 |             x.append(np.array(X, copy=False))
 27 |             y.append(np.array(Y, copy=False))
 28 |             u.append(np.array(U, copy=False))
 29 |             r.append(np.array(R, copy=False))
 30 |             d.append(np.array(D, copy=False))
 31 |         return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1)
 32 | 
 33 | 
 34 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 35 | 
 36 | 
 37 | class Actor(nn.Module):
 38 |     def __init__(self, state_dim, action_dim, max_action):
 39 |         super(Actor, self).__init__()
 40 |         self.l1 = nn.Linear(state_dim, 400)
 41 |         self.l2 = nn.Linear(400, 300)
 42 |         self.l3 = nn.Linear(300, action_dim)
 43 |         self.max_action = max_action
 44 | 
 45 |     def forward(self, x):
 46 |         x = F.relu(self.l1(x))
 47 |         x = F.relu(self.l2(x))
 48 |         x = self.max_action * torch.tanh(self.l3(x))
 49 |         return x
 50 | 
 51 | 
 52 | class Critic(nn.Module):
 53 |     def __init__(self, state_dim, action_dim):
 54 |         super(Critic, self).__init__()
 55 |         self.l1 = nn.Linear(state_dim, 400)
 56 |         self.l2 = nn.Linear(400 + action_dim, 300)
 57 |         self.l3 = nn.Linear(300, 1)
 58 | 
 59 |     def forward(self, x, u):
 60 |         x = F.relu(self.l1(x))
 61 |         x = F.relu(self.l2(torch.cat([x, u], 1)))
 62 |         x = self.l3(x)
 63 |         return x
 64 | 
 65 | 
 66 | class DDPG(object):
 67 |     def __init__(self, state_dim, action_dim, max_action):
 68 |         self.actor = Actor(state_dim, action_dim, max_action).to(device)
 69 |         self.actor_target = Actor(state_dim, action_dim, max_action).to(device)
 70 |         self.actor_target.load_state_dict(self.actor.state_dict())
 71 |         self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=1e-4)
 72 |         self.critic = Critic(state_dim, action_dim).to(device)
 73 |         self.critic_target = Critic(state_dim, action_dim).to(device)
 74 |         self.critic_target.load_state_dict(self.critic.state_dict())
 75 |         self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), weight_decay=1e-2)
 76 | 
 77 |     def select_action(self, state):
 78 |         state = torch.FloatTensor(state.reshape(1, -1)).to(device)
 79 |         return self.actor(state).cpu().data.numpy().flatten()
 80 | 
 81 |     def train(self, replay_buffer, iterations, batch_size=64, discount=0.99, tau=0.001):
 82 | 
 83 |         for _ in range(iterations):
 84 |             x, y, u, r, d = replay_buffer.sample(batch_size)
 85 |             state = torch.FloatTensor(x).to(device)
 86 |             action = torch.FloatTensor(u).to(device)
 87 |             next_state = torch.FloatTensor(y).to(device)
 88 |             done = torch.FloatTensor(1 - d).to(device)
 89 |             reward = torch.FloatTensor(r).to(device)
 90 | 
 91 |             target_Q = self.critic_target(next_state, self.actor_target(next_state))
 92 |             target_Q = reward + (done * discount * target_Q).detach()
 93 |             current_Q = self.critic(state, action)
 94 | 
 95 |             critic_loss = F.mse_loss(current_Q, target_Q)
 96 |             self.critic_optimizer.zero_grad()
 97 |             critic_loss.backward()
 98 |             self.critic_optimizer.step()
 99 | 
100 |             actor_loss = -self.critic(state, self.actor(state)).mean()
101 |             self.actor_optimizer.zero_grad()
102 |             actor_loss.backward()
103 |             self.actor_optimizer.step()
104 | 
105 |             # Update model
106 |             for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
107 |                 target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
108 |             for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
109 |                 target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     parser = argparse.ArgumentParser()
114 |     parser.add_argument("--env_name", default="Pendulum-v0")
115 |     parser.add_argument("--seed", default=0, type=int, help='Sets Gym, PyTorch and Numpy seeds')
116 |     parser.add_argument("--start_timesteps", default=1e4, type=int, help='how many step random policy run')
117 |     parser.add_argument("--max_timesteps", default=1e6, type=float, help='max_timesteps')
118 |     parser.add_argument("--expl_noise", default=0.1, type=float, help='Gaussian exploration')
119 |     parser.add_argument("--batch_size", default=100, type=int, help='Batch size')
120 |     parser.add_argument("--GAMMA", default=0.99, type=float, help='Discount')
121 |     parser.add_argument("--tau", default=0.005, type=float, help='DDPG update rate')
122 |     parser.add_argument("--policy_noise", default=0.2, type=float, help='Noise to target policy during critic update')
123 |     parser.add_argument("--noise_clip", default=0.5, type=float, help='Range to clip target policy noise')
124 |     parser.add_argument("--policy_freq", default=2, type=int, help=' Frequency of delayed policy updates')
125 |     args = parser.parse_args()
126 | 
127 |     env = gym.make(args.env_name)
128 |     env.seed(args.seed)
129 |     torch.manual_seed(args.seed)
130 |     np.random.seed(args.seed)
131 |     state_dim = env.observation_space.shape[0]
132 |     action_dim = env.action_space.shape[0]
133 |     max_action = float(env.action_space.high[0])
134 |     policy = DDPG(state_dim, action_dim, max_action)
135 |     replay_buffer = ReplayBuffer()
136 |     total_timesteps = 0
137 |     timesteps_since_eval = 0
138 |     episode_num = 0
139 |     episode_reward = 0
140 |     episode_timesteps = 0
141 |     done = True
142 | 
143 |     while total_timesteps < args.max_timesteps:
144 |         if done:
145 |             if total_timesteps != 0:
146 |                 print(("Total T: %d Episode Num: %d Episode T: %d Reward: %f") % (total_timesteps, episode_num, episode_timesteps, episode_reward))
147 |                 policy.train(replay_buffer, episode_timesteps, args.batch_size, args.GAMMA, args.tau)
148 | 
149 |             obs = env.reset()
150 |             done = False
151 |             episode_reward = 0
152 |             episode_timesteps = 0
153 |             episode_num += 1
154 | 
155 |         if total_timesteps < args.start_timesteps:
156 |             action = env.action_space.sample()
157 |         else:
158 |             action = policy.select_action(np.array(obs))
159 |             if args.expl_noise != 0:
160 |                 action = (action + np.random.normal(0, args.expl_noise, size=env.action_space.shape[0])).clip(
161 |                     env.action_space.low, env.action_space.high)
162 | 
163 |         new_obs, reward, done, _ = env.step(action)
164 |         done_bool = 0 if episode_timesteps + 1 == env._max_episode_steps else float(done)
165 |         episode_reward += reward
166 | 
167 |         replay_buffer.add((obs, new_obs, action, reward, done_bool))
168 |         obs = new_obs
169 |         episode_timesteps += 1
170 |         total_timesteps += 1
171 |         timesteps_since_eval += 1


--------------------------------------------------------------------------------
/double-DQN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from collections import deque
  4 | import numpy as np
  5 | import gym
  6 | import random
  7 | from net import AtariNet
  8 | from util import preprocess
  9 | 
 10 | BATCH_SIZE = 32
 11 | LR = 0.001
 12 | START_EPSILON = 1.0
 13 | FINAL_EPSILON = 0.1
 14 | EPSILON = START_EPSILON
 15 | EXPLORE = 1000000
 16 | GAMMA = 0.99
 17 | TOTAL_EPISODES = 10000000
 18 | MEMORY_SIZE = 1000000
 19 | MEMORY_THRESHOLD = 100000
 20 | UPDATE_TIME = 10000
 21 | TEST_FREQUENCY = 1000
 22 | env = gym.make('Pong-v0')
 23 | env = env.unwrapped
 24 | ACTIONS_SIZE = env.action_space.n
 25 | 
 26 | 
 27 | class Agent(object):
 28 |     def __init__(self):
 29 |         self.network, self.target_network = AtariNet(ACTIONS_SIZE), AtariNet(ACTIONS_SIZE)
 30 |         self.memory = deque()
 31 |         self.learning_count = 0
 32 |         self.optimizer = torch.optim.Adam(self.network.parameters(), lr=LR)
 33 |         self.loss_func = nn.MSELoss()
 34 | 
 35 |     def action(self, state, israndom):
 36 |         if israndom and random.random() < EPSILON:
 37 |             return np.random.randint(0, ACTIONS_SIZE)
 38 |         state = torch.unsqueeze(torch.FloatTensor(state), 0)
 39 |         actions_value = self.network.forward(state)
 40 |         return torch.max(actions_value, 1)[1].data.numpy()[0]
 41 | 
 42 |     def learn(self, state, action, reward, next_state, done):
 43 |         if done:
 44 |             self.memory.append((state, action, reward, next_state, 0))
 45 |         else:
 46 |             self.memory.append((state, action, reward, next_state, 1))
 47 |         if len(self.memory) > MEMORY_SIZE:
 48 |             self.memory.popleft()
 49 |         if len(self.memory) < MEMORY_THRESHOLD:
 50 |             return
 51 | 
 52 |         if self.learning_count % UPDATE_TIME == 0:
 53 |             self.target_network.load_state_dict(self.network.state_dict())
 54 |         self.learning_count += 1
 55 | 
 56 |         batch = random.sample(self.memory, BATCH_SIZE)
 57 |         state = torch.FloatTensor([x[0] for x in batch])
 58 |         action = torch.LongTensor([[x[1]] for x in batch])
 59 |         reward = torch.FloatTensor([[x[2]] for x in batch])
 60 |         next_state = torch.FloatTensor([x[3] for x in batch])
 61 |         done = torch.FloatTensor([[x[4]] for x in batch])
 62 | 
 63 |         actions_value = self.network.forward(next_state)
 64 |         next_action = torch.unsqueeze(torch.max(actions_value, 1)[1], 1)
 65 |         eval_q = self.network.forward(state).gather(1, action)
 66 |         next_q = self.target_network.forward(next_state).gather(1, next_action)
 67 |         target_q = reward + GAMMA * next_q * done
 68 |         loss = self.loss_func(eval_q, target_q)
 69 | 
 70 |         self.optimizer.zero_grad()
 71 |         loss.backward()
 72 |         self.optimizer.step()
 73 | 
 74 | 
 75 | agent = Agent()
 76 | 
 77 | for i_episode in range(TOTAL_EPISODES):
 78 |     state = env.reset()
 79 |     state = preprocess(state)
 80 |     while True:
 81 |         # env.render()
 82 |         action = agent.action(state, True)
 83 |         next_state, reward, done, info = env.step(action)
 84 |         next_state = preprocess(next_state)
 85 |         agent.learn(state, action, reward, next_state, done)
 86 | 
 87 |         state = next_state
 88 |         if done:
 89 |             break
 90 |     if EPSILON > FINAL_EPSILON:
 91 |         EPSILON -= (START_EPSILON - FINAL_EPSILON) / EXPLORE
 92 | 
 93 |     # TEST
 94 |     if i_episode % TEST_FREQUENCY == 0:
 95 |         state = env.reset()
 96 |         state = preprocess(state)
 97 |         total_reward = 0
 98 |         while True:
 99 |             # env.render()
100 |             action = agent.action(state, israndom=False)
101 |             next_state, reward, done, info = env.step(action)
102 |             next_state = preprocess(next_state)
103 | 
104 |             total_reward += reward
105 | 
106 |             state = next_state
107 |             if done:
108 |                 break
109 |         print('episode: {} , total_reward: {}'.format(i_episode, round(total_reward, 3)))
110 | 
111 | env.close()
112 | 


--------------------------------------------------------------------------------
/dueling-DQN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from collections import deque
  4 | import numpy as np
  5 | import gym
  6 | import random
  7 | import cv2
  8 | 
  9 | BATCH_SIZE = 32
 10 | LR = 0.001
 11 | START_EPSILON = 1.0
 12 | FINAL_EPSILON = 0.1
 13 | EPSILON = START_EPSILON
 14 | EXPLORE = 1000000
 15 | GAMMA = 0.99
 16 | TOTAL_EPISODES = 10000000
 17 | MEMORY_SIZE = 1000000
 18 | MEMORY_THRESHOLD = 100000
 19 | TEST_FREQUENCY = 1000
 20 | env = gym.make('Pong-v0')
 21 | env = env.unwrapped
 22 | ACTIONS_SIZE = env.action_space.n
 23 | 
 24 | 
 25 | def preprocess(observation):
 26 |     observation = cv2.cvtColor(cv2.resize(observation, (84, 110)), cv2.COLOR_BGR2GRAY)
 27 |     observation = observation[26:110,:]
 28 |     ret, observation = cv2.threshold(observation,1,255,cv2.THRESH_BINARY)
 29 |     x = np.reshape(observation,(84,84,1))
 30 |     return x.transpose((2, 0, 1))
 31 | 
 32 | 
 33 | class DuelingNet(nn.Module):
 34 | 
 35 |     def __init__(self, num_actions):
 36 |         super(DuelingNet, self).__init__()
 37 |         self.num_actions = num_actions
 38 |         self.conv1 = nn.Sequential(
 39 |             nn.Conv2d(1, 32, kernel_size=8, stride=4),
 40 |             nn.ReLU()
 41 |         )
 42 |         self.conv2 = nn.Sequential(
 43 |             nn.Conv2d(32, 64, kernel_size=4, stride=2),
 44 |             nn.ReLU()
 45 |         )
 46 |         self.conv3 = nn.Sequential(
 47 |             nn.Conv2d(64, 64, kernel_size=3, stride=1),
 48 |             nn.ReLU()
 49 |         )
 50 |         self.hidden_adv = nn.Sequential(
 51 |             nn.Linear(64 * 7 * 7, 512, bias=True),
 52 |             nn.ReLU()
 53 |         )
 54 |         self.hidden_val = nn.Sequential(
 55 |             nn.Linear(64 * 7 * 7, 512, bias=True),
 56 |             nn.ReLU()
 57 |         )
 58 |         self.adv = nn.Sequential(
 59 |             nn.Linear(512, num_actions, bias=True)
 60 |         )
 61 |         self.val = nn.Sequential(
 62 |             nn.Linear(512, 1, bias=True)
 63 |         )
 64 |         self.apply(self.init_weights)
 65 | 
 66 |     def init_weights(self, m):
 67 |         if type(m) == nn.Conv2d:
 68 |             m.weight.data.normal_(0.0, 0.02)
 69 |         if type(m) == nn.Linear:
 70 |             torch.nn.init.xavier_uniform_(m.weight)
 71 |             m.bias.data.fill_(0.01)
 72 | 
 73 |     def forward(self, x):
 74 |         x = self.conv1(x)
 75 |         x = self.conv2(x)
 76 |         x = self.conv3(x)
 77 |         x = x.view(x.size(0), -1)
 78 |         adv = self.hidden_adv(x)
 79 |         val = self.hidden_val(x)
 80 | 
 81 |         adv = self.adv(adv)
 82 |         val = self.val(val).expand(x.size(0), self.num_actions)
 83 | 
 84 |         x = val + adv - adv.mean(1).unsqueeze(1).expand(x.size(0), self.num_actions)
 85 |         return x
 86 | 
 87 | class Agent(object):
 88 |     def __init__(self):
 89 |         self.network = DuelingNet(ACTIONS_SIZE)
 90 |         self.memory = deque()
 91 |         self.optimizer = torch.optim.Adam(self.network.parameters(), lr=LR)
 92 |         self.loss_func = nn.MSELoss()
 93 | 
 94 |     def action(self, state, israndom):
 95 |         if israndom and random.random() < EPSILON:
 96 |             return np.random.randint(0, ACTIONS_SIZE)
 97 |         state = torch.unsqueeze(torch.FloatTensor(state), 0)
 98 |         actions_value = self.network.forward(state)
 99 |         return torch.max(actions_value, 1)[1].data.numpy()[0]
100 | 
101 |     def learn(self, state, action, reward, next_state, done):
102 |         if done:
103 |             self.memory.append((state, action, reward, next_state, 0))
104 |         else:
105 |             self.memory.append((state, action, reward, next_state, 1))
106 |         if len(self.memory) > MEMORY_SIZE:
107 |             self.memory.popleft()
108 |         if len(self.memory) < MEMORY_THRESHOLD:
109 |             return
110 | 
111 |         batch = random.sample(self.memory, BATCH_SIZE)
112 |         state = torch.FloatTensor([x[0] for x in batch])
113 |         action = torch.LongTensor([[x[1]] for x in batch])
114 |         reward = torch.FloatTensor([[x[2]] for x in batch])
115 |         next_state = torch.FloatTensor([x[3] for x in batch])
116 |         done = torch.FloatTensor([[x[4]] for x in batch])
117 | 
118 |         eval_q = self.network.forward(state).gather(1, action)
119 |         next_q = self.network(next_state).detach()
120 |         target_q = reward + GAMMA * next_q.max(1)[0].view(BATCH_SIZE, 1) * done
121 |         loss = self.loss_func(eval_q, target_q)
122 | 
123 |         self.optimizer.zero_grad()
124 |         loss.backward()
125 |         self.optimizer.step()
126 | 
127 | 
128 | agent = Agent()
129 | 
130 | for i_episode in range(TOTAL_EPISODES):
131 |     state = env.reset()
132 |     state = preprocess(state)
133 |     while True:
134 |         # env.render()
135 |         action = agent.action(state, True)
136 |         next_state, reward, done, info = env.step(action)
137 |         next_state = preprocess(next_state)
138 |         agent.learn(state, action, reward, next_state, done)
139 | 
140 |         state = next_state
141 |         if done:
142 |             break
143 |     if EPSILON > FINAL_EPSILON:
144 |         EPSILON -= (START_EPSILON - FINAL_EPSILON) / EXPLORE
145 | 
146 |     # TEST
147 |     if i_episode % TEST_FREQUENCY == 0:
148 |         state = env.reset()
149 |         state = preprocess(state)
150 |         total_reward = 0
151 |         while True:
152 |             # env.render()
153 |             action = agent.action(state, israndom=False)
154 |             next_state, reward, done, info = env.step(action)
155 |             next_state = preprocess(next_state)
156 | 
157 |             total_reward += reward
158 | 
159 |             state = next_state
160 |             if done:
161 |                 break
162 |         print('episode: {} , total_reward: {}'.format(i_episode, round(total_reward, 3)))
163 | 
164 | env.close()
165 | 


--------------------------------------------------------------------------------
/feedforward_neural_network/main.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | import torchvision.transforms as transforms
 5 | 
 6 | 
 7 | # Device configuration
 8 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 9 | 
10 | # Hyper-parameters 
11 | input_size = 784
12 | hidden_size = 500
13 | num_classes = 10
14 | num_epochs = 5
15 | batch_size = 100
16 | learning_rate = 0.001
17 | 
18 | # MNIST dataset 
19 | train_dataset = torchvision.datasets.MNIST(root='../../data', 
20 |                                            train=True, 
21 |                                            transform=transforms.ToTensor(),  
22 |                                            download=True)
23 | 
24 | test_dataset = torchvision.datasets.MNIST(root='../../data', 
25 |                                           train=False, 
26 |                                           transform=transforms.ToTensor())
27 | 
28 | # Data loader
29 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
30 |                                            batch_size=batch_size, 
31 |                                            shuffle=True)
32 | 
33 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
34 |                                           batch_size=batch_size, 
35 |                                           shuffle=False)
36 | 
37 | # Fully connected neural network with one hidden layer
38 | class NeuralNet(nn.Module):
39 |     def __init__(self, input_size, hidden_size, num_classes):
40 |         super(NeuralNet, self).__init__()
41 |         self.fc1 = nn.Linear(input_size, hidden_size) 
42 |         self.relu = nn.ReLU()
43 |         self.fc2 = nn.Linear(hidden_size, num_classes)  
44 |     
45 |     def forward(self, x):
46 |         out = self.fc1(x)
47 |         out = self.relu(out)
48 |         out = self.fc2(out)
49 |         return out
50 | 
51 | model = NeuralNet(input_size, hidden_size, num_classes).to(device)
52 | 
53 | # Loss and optimizer
54 | criterion = nn.CrossEntropyLoss()
55 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  
56 | 
57 | # Train the model
58 | total_step = len(train_loader)
59 | for epoch in range(num_epochs):
60 |     for i, (images, labels) in enumerate(train_loader):  
61 |         # Move tensors to the configured device
62 |         images = images.reshape(-1, 28*28).to(device)
63 |         labels = labels.to(device)
64 |         
65 |         # Forward pass
66 |         outputs = model(images)
67 |         loss = criterion(outputs, labels)
68 |         
69 |         # Backward and optimize
70 |         optimizer.zero_grad()
71 |         loss.backward()
72 |         optimizer.step()
73 |         
74 |         if (i+1) % 100 == 0:
75 |             print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
76 |                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
77 | 
78 | # Test the model
79 | # In test phase, we don't need to compute gradients (for memory efficiency)
80 | with torch.no_grad():
81 |     correct = 0
82 |     total = 0
83 |     for images, labels in test_loader:
84 |         images = images.reshape(-1, 28*28).to(device)
85 |         labels = labels.to(device)
86 |         outputs = model(images)
87 |         _, predicted = torch.max(outputs.data, 1)
88 |         total += labels.size(0)
89 |         correct += (predicted == labels).sum().item()
90 | 
91 |     print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
92 | 
93 | # Save the model checkpoint
94 | torch.save(model.state_dict(), 'model.ckpt')
95 | 


--------------------------------------------------------------------------------
/ga/bag.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import random
  3 | #背包问题
  4 | #物品质量价格
  5 | X = {
  6 |     1: [10, 15],
  7 |     2: [15, 25],
  8 |     3: [20, 35],
  9 |     4: [25, 45],
 10 |     5: [30, 55],
 11 |     6: [35, 70]}
 12 | 
 13 | #终止界限
 14 | FINISHED_LIMIT = 5
 15 | 
 16 | #重量界限
 17 | WEIGHT_LIMIT = 80
 18 | 
 19 | #染色体长度
 20 | CHROMOSOME_SIZE = 6
 21 | 
 22 | #遴选次数
 23 | SELECT_NUMBER = 4
 24 | 
 25 | max_last = 0
 26 | diff_last = 10000
 27 | 
 28 | #判断退出
 29 | def is_finished(fitnesses):
 30 |     global max_last
 31 |     global diff_last
 32 | 
 33 |     max_current = 0
 34 |     for v in fitnesses:
 35 |         if v[1] > max_current:
 36 |             max_current = v[1]
 37 | 
 38 |     diff = max_current - max_last
 39 |     if diff < FINISHED_LIMIT and diff_last < FINISHED_LIMIT:
 40 |         return True
 41 |     else:
 42 |         diff_last = diff
 43 |         max_last = max_current
 44 |         return False
 45 | 
 46 | #初始染色体样态
 47 | def init():
 48 |     chromosome_state1 = '100100'
 49 |     chromosome_state2 = '101010'
 50 |     chromosome_state3 = '010101'
 51 |     chromosome_state4 = '101011'
 52 |     chromosome_states = [chromosome_state1,
 53 |                          chromosome_state2,
 54 |                          chromosome_state3,
 55 |                          chromosome_state4]
 56 |     return chromosome_states
 57 | 
 58 | 
 59 | #计算适应度
 60 | def fitness(chromosome_states):
 61 |     fitnesses = []
 62 |     for chromosome_state in chromosome_states:
 63 |         value_sum = 0
 64 |         weight_sum = 0
 65 |         for i, v in enumerate(chromosome_state):
 66 |             if int(v) == 1:
 67 |                 weight_sum += X[i + 1][0]
 68 |                 value_sum += X[i + 1][1]
 69 |         fitnesses.append([value_sum, weight_sum])
 70 |     return fitnesses
 71 | 
 72 | 
 73 | #筛选
 74 | def filter(chromosome_states, fitnesses):
 75 |     #重量大于80的被淘汰
 76 |     index = len(fitnesses) - 1
 77 |     while index >= 0:
 78 |         index -= 1
 79 |         if fitnesses[index][1] > WEIGHT_LIMIT:
 80 |             chromosome_states.pop(index)
 81 |             fitnesses.pop(index)
 82 | 
 83 |     #遴选
 84 |     selected_index = [0] * len(chromosome_states)
 85 |     for i in range(SELECT_NUMBER):
 86 |         j = chromosome_states.index(random.choice(chromosome_states))
 87 |         selected_index[j] += 1
 88 |     return selected_index
 89 | 
 90 | 
 91 | #产生下一代
 92 | def crossover(chromosome_states, selected_index):
 93 |     chromosome_states_new = []
 94 |     index = len(chromosome_states) - 1
 95 |     while index >= 0:
 96 |         index -= 1
 97 |         chromosome_state = chromosome_states.pop(index)
 98 |         for i in range(selected_index[index]):
 99 |             chromosome_state_x = random.choice(chromosome_states)
100 |             pos = random.choice(range(1, CHROMOSOME_SIZE - 1))
101 |             chromosome_states_new.append(chromosome_state[:pos] + chromosome_state_x[pos:])
102 |         chromosome_states.insert(index, chromosome_state)
103 |     return chromosome_states_new
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     #初始群体
108 |     chromosome_states = init()
109 |     n = 100
110 |     while n > 0:
111 |         n -= 1
112 |         #适应度计算
113 |         fitnesses = fitness(chromosome_states)
114 |         if is_finished(fitnesses):
115 |             break
116 |         print('1:', fitnesses)
117 |         #遴选
118 |         selected_index = filter(chromosome_states, fitnesses)
119 |         print('2:', selected_index)
120 |         #产生下一代
121 |         chromosome_states = crossover(chromosome_states, selected_index)
122 |         # print '3:', chromosome_states
123 | 
124 |     fitnesses = fitness(chromosome_states)
125 |     print(chromosome_states)
126 | 
127 | # 1: [[60, 35], [105, 60], [140, 75], [175, 95]]
128 | # 2: [1, 1, 2]
129 | 
130 | # 1: [[60, 35], [105, 60], [80, 45], [90, 50]]
131 | # 2: [2, 1, 0, 1]
132 | 
133 | # 1: [[95, 55], [115, 65], [70, 40], [90, 50]]
134 | # 2: [2, 0, 2, 0]
135 | 
136 | # 1: [[70, 40], [70, 40], [150, 85], [115, 65]]
137 | # 2: [3, 0, 1]
138 | 
139 | # 1: [[115, 65], [115, 65], [115, 65], [70, 40]]
140 | # 2: [2, 0, 0, 2]
141 | # ['100110', '100110', '100110', '100110']
142 | 


--------------------------------------------------------------------------------
/ga/peak.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import random
  3 | import math
  4 | import numpy as np
  5 | 
  6 | 
  7 | #极大值问题
  8 | #染色体 基因X 基因Y
  9 | X = [
 10 |     [1, '000000100101001', '101010101010101'],
 11 |     [2, '011000100101100', '001100110011001'],
 12 |     [3, '001000100100101', '101010101010101'],
 13 |     [4, '000110100100100', '110011001100110'],
 14 |     [5, '100000100100101', '101010101010101'],
 15 |     [6, '101000100100100', '111100001111000'],
 16 |     [7, '101010100110100', '101010101010101'],
 17 |     [8, '100110101101000', '000011110000111']]
 18 | 
 19 | 
 20 | #染色体长度
 21 | CHROMOSOME_SIZE = 15
 22 | 
 23 | 
 24 | #判断退出
 25 | def is_finished(last_three):
 26 |     s = sorted(last_three)
 27 |     if s[0] and s[2] - s[0] < 0.01 * s[0]:
 28 |         return True
 29 |     else:
 30 |         return False
 31 | 
 32 | #初始染色体样态
 33 | def init():
 34 |     chromosome_state1 = ['000000100101001', '101010101010101']
 35 |     chromosome_state2 = ['011000100101100', '001100110011001']
 36 |     chromosome_state3 = ['001000100100101', '101010101010101']
 37 |     chromosome_state4 = ['000110100100100', '110011001100110']
 38 |     chromosome_state5 = ['100000100100101', '101010101010101']
 39 |     chromosome_state6 = ['101000100100100', '111100001111000']
 40 |     chromosome_state7 = ['101010100110100', '101010101010101']
 41 |     chromosome_state8 = ['100110101101000', '000011110000111']
 42 |     chromosome_states = [chromosome_state1,
 43 |                          chromosome_state2,
 44 |                          chromosome_state3,
 45 |                          chromosome_state4,
 46 |                          chromosome_state5,
 47 |                          chromosome_state6,
 48 |                          chromosome_state7,
 49 |                          chromosome_state8]
 50 |     return chromosome_states
 51 | 
 52 | 
 53 | #计算适应度
 54 | def fitness(chromosome_states):
 55 |     fitnesses = []
 56 |     for chromosome_state in chromosome_states:
 57 |         if chromosome_state[0][0] == '1':
 58 |             x = 10 * (-float(int(chromosome_state[0][1:], 2) - 1)/16384)
 59 |         else:
 60 |             x = 10 * (float(int(chromosome_state[0], 2) + 1)/16384)
 61 |         if chromosome_state[1][0] == '1':
 62 |             y = 10 * (-float(int(chromosome_state[1][1:], 2) - 1)/16384)
 63 |         else:
 64 |             y = 10 * (float(int(chromosome_state[1], 2) + 1)/16384)
 65 |         z = y * math.sin(x) + x * math.cos(y)
 66 |         print(x, y, z)
 67 |         fitnesses.append(z)
 68 | 
 69 |     return fitnesses
 70 | 
 71 | 
 72 | #筛选
 73 | def filter(chromosome_states, fitnesses):
 74 |     #top 8 对应的索引值
 75 |     chromosome_states_new = []
 76 |     top1_fitness_index = 0
 77 |     for i in np.argsort(fitnesses)[::-1][:8].tolist():
 78 |         chromosome_states_new.append(chromosome_states[i])
 79 |         top1_fitness_index = i
 80 |     return chromosome_states_new, top1_fitness_index
 81 | 
 82 | 
 83 | #产生下一代
 84 | def crossover(chromosome_states):
 85 |     chromosome_states_new = []
 86 |     while chromosome_states:
 87 |         chromosome_state = chromosome_states.pop(0)
 88 |         for v in chromosome_states:
 89 |             pos = random.choice(range(8, CHROMOSOME_SIZE - 1))
 90 |             chromosome_states_new.append([chromosome_state[0][:pos] + v[0][pos:], chromosome_state[1][:pos] + v[1][pos:]])
 91 |             chromosome_states_new.append([v[0][:pos] + chromosome_state[1][pos:], v[0][:pos] + chromosome_state[1][pos:]])
 92 |     return chromosome_states_new
 93 | 
 94 | 
 95 | #基因突变
 96 | def mutation(chromosome_states):
 97 |     n = int(5.0 / 100 * len(chromosome_states))
 98 |     while n > 0:
 99 |         n -= 1
100 |         chromosome_state = random.choice(chromosome_states)
101 |         index = chromosome_states.index(chromosome_state)
102 |         pos = random.choice(range(len(chromosome_state)))
103 |         x = chromosome_state[0][:pos] + str(int(not int(chromosome_state[0][pos]))) + chromosome_state[0][pos+1:]
104 |         y = chromosome_state[1][:pos] + str(int(not int(chromosome_state[1][pos]))) + chromosome_state[1][pos+1:]
105 |         chromosome_states[index] = [x, y]
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     chromosome_states = init()
110 |     last_three = [0] * 3
111 |     last_num = 0
112 |     n = 100
113 |     while n > 0:
114 |         n -= 1
115 |         chromosome_states = crossover(chromosome_states)
116 |         mutation(chromosome_states)
117 |         fitnesses = fitness(chromosome_states)
118 |         chromosome_states, top1_fitness_index = filter(chromosome_states, fitnesses)
119 |         print('---------%d-----------' % n)
120 |         print(chromosome_states)
121 |         last_three[last_num] = fitnesses[top1_fitness_index]
122 |         print(fitnesses[top1_fitness_index])
123 |         if is_finished(last_three):
124 |             break
125 |         if last_num >= 2:
126 |             last_num = 0
127 |         else:
128 |             last_num += 1
129 | 
130 | 
131 | # ['100100', '101010', '010101', '101011']
132 | 
133 | # 1: [[60, 35], [105, 60], [140, 75], [175, 95]]
134 | # 2: [0, 2, 2]
135 | #
136 | # 1: [[60, 35], [60, 35], [80, 45], [125, 70]]
137 | # 2: [3, 0, 1, 0]
138 | #
139 | # 1: [[80, 45], [60, 35], [60, 35], [140, 80]]
140 | # 2: [1, 2, 0, 1]
141 | #
142 | # 1: [[70, 40], [70, 40], [70, 40], [85, 50]]
143 | # 2: [3, 0, 0, 1]
144 | #
145 | # 1: [[70, 40], [70, 40], [70, 40], [95, 55]]
146 | # 2: [4, 0, 0, 0]
147 | #
148 | # 1: [[70, 40], [70, 40], [70, 40], [70, 40]]
149 | # 2: [4, 0, 0, 0]
150 | #
151 | # ['100010', '100010', '100010', '100010']
152 | # [[70, 40], [70, 40], [70, 40], [70, 40]]
153 | 


--------------------------------------------------------------------------------
/gym_sample/demo.py:
--------------------------------------------------------------------------------
1 | import gym
2 | env = gym.make('CartPole-v0')
3 | env.reset()
4 | for _ in range(1000):
5 |     env.render()
6 |     env.step(env.action_space.sample()) # take a random action
7 | 


--------------------------------------------------------------------------------
/native-Qlearning.py:
--------------------------------------------------------------------------------
 1 | ﻿# 问题http://mnemstudio.org/path-finding-q-learning-tutorial.htm的Q学习方法实现
 2 | import numpy as np
 3 | import random
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | Q_fun = np.zeros((6, 6))
 7 | 
 8 | # 回报函数，在状态state采用action转移到next_state的回报，横纵坐标分别为state和next_state
 9 | reward = np.array([[-1, -1, -1, -1, 0, -1],
10 |               [-1, -1, -1, 0, -1, 100],
11 |               [-1, -1, -1, 0, -1, -1],
12 |               [-1, 0, 0, -1, 0, -1],
13 |               [0, -1, -1, 0, -1, 100],
14 |               [-1, 0, -1, -1, 0, 100]])
15 | 
16 | legal_action = [[4],
17 |                 [3, 5],
18 |                 [3],
19 |                 [1, 2, 4],
20 |                 [0, 3, 5],
21 |                 [1, 4, 5]]
22 | 
23 | GAMMA = 0.5
24 | TRAINING_STEP = 100
25 | LAYOUT = 221
26 | 
27 | for i in range(1, TRAINING_STEP + 1):
28 |     state = random.randint(0, 4)
29 |     # 百分百探索，随机产生next_state
30 |     next_state = random.choice(legal_action[state])
31 |     Q_fun[state, next_state] = reward[state, next_state] + GAMMA * Q_fun[next_state].max()
32 | 
33 |     if i % (TRAINING_STEP/4) == 0:
34 |         plt.subplot(LAYOUT)
35 |         plt.imshow(Q_fun, cmap='gray_r')
36 |         LAYOUT += 1
37 |         print(Q_fun)
38 | plt.show()
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/nature-DQN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from collections import deque
  4 | import numpy as np
  5 | import gym
  6 | import random
  7 | from net import AtariNet
  8 | from util import preprocess
  9 | 
 10 | BATCH_SIZE = 32
 11 | LR = 0.001
 12 | START_EPSILON = 1.0
 13 | FINAL_EPSILON = 0.1
 14 | EPSILON = START_EPSILON
 15 | EXPLORE = 1000000
 16 | GAMMA = 0.99
 17 | TOTAL_EPISODES = 10000000
 18 | MEMORY_SIZE = 1000000
 19 | MEMORY_THRESHOLD = 100000
 20 | UPDATE_TIME = 10000
 21 | TEST_FREQUENCY = 1000
 22 | env = gym.make('Pong-v0')
 23 | env = env.unwrapped
 24 | ACTIONS_SIZE = env.action_space.n
 25 | 
 26 | 
 27 | class Agent(object):
 28 |     def __init__(self):
 29 |         self.network, self.target_network = AtariNet(ACTIONS_SIZE), AtariNet(ACTIONS_SIZE)
 30 |         self.memory = deque()
 31 |         self.learning_count = 0
 32 |         self.optimizer = torch.optim.Adam(self.network.parameters(), lr=LR)
 33 |         self.loss_func = nn.MSELoss()
 34 | 
 35 |     def action(self, state, israndom):
 36 |         if israndom and random.random() < EPSILON:
 37 |             return np.random.randint(0, ACTIONS_SIZE)
 38 |         state = torch.unsqueeze(torch.FloatTensor(state), 0)
 39 |         actions_value = self.network.forward(state)
 40 |         return torch.max(actions_value, 1)[1].data.numpy()[0]
 41 | 
 42 |     def learn(self, state, action, reward, next_state, done):
 43 |         if done:
 44 |             self.memory.append((state, action, reward, next_state, 0))
 45 |         else:
 46 |             self.memory.append((state, action, reward, next_state, 1))
 47 |         if len(self.memory) > MEMORY_SIZE:
 48 |             self.memory.popleft()
 49 |         if len(self.memory) < MEMORY_THRESHOLD:
 50 |             return
 51 | 
 52 |         if self.learning_count % UPDATE_TIME == 0:
 53 |             self.target_network.load_state_dict(self.network.state_dict())
 54 |         self.learning_count += 1
 55 | 
 56 |         batch = random.sample(self.memory, BATCH_SIZE)
 57 |         state = torch.FloatTensor([x[0] for x in batch])
 58 |         action = torch.LongTensor([[x[1]] for x in batch])
 59 |         reward = torch.FloatTensor([[x[2]] for x in batch])
 60 |         next_state = torch.FloatTensor([x[3] for x in batch])
 61 |         done = torch.FloatTensor([[x[4]] for x in batch])
 62 | 
 63 |         eval_q = self.network.forward(state).gather(1, action)
 64 |         next_q = self.target_network(next_state).detach()
 65 |         target_q = reward + GAMMA * next_q.max(1)[0].view(BATCH_SIZE, 1) * done
 66 |         loss = self.loss_func(eval_q, target_q)
 67 | 
 68 |         self.optimizer.zero_grad()
 69 |         loss.backward()
 70 |         self.optimizer.step()
 71 | 
 72 | 
 73 | agent = Agent()
 74 | 
 75 | for i_episode in range(TOTAL_EPISODES):
 76 |     state = env.reset()
 77 |     state = preprocess(state)
 78 |     while True:
 79 |         # env.render()
 80 |         action = agent.action(state, True)
 81 |         next_state, reward, done, info = env.step(action)
 82 |         next_state = preprocess(next_state)
 83 |         agent.learn(state, action, reward, next_state, done)
 84 | 
 85 |         state = next_state
 86 |         if done:
 87 |             break
 88 |     if EPSILON > FINAL_EPSILON:
 89 |         EPSILON -= (START_EPSILON - FINAL_EPSILON) / EXPLORE
 90 | 
 91 |     # TEST
 92 |     if i_episode % TEST_FREQUENCY == 0:
 93 |         state = env.reset()
 94 |         state = preprocess(state)
 95 |         total_reward = 0
 96 |         while True:
 97 |             # env.render()
 98 |             action = agent.action(state, israndom=False)
 99 |             next_state, reward, done, info = env.step(action)
100 |             next_state = preprocess(next_state)
101 | 
102 |             total_reward += reward
103 | 
104 |             state = next_state
105 |             if done:
106 |                 break
107 |         print('episode: {} , total_reward: {}'.format(i_episode, round(total_reward, 3)))
108 | 
109 | env.close()
110 | 


--------------------------------------------------------------------------------
/neat/Digraph.gv:
--------------------------------------------------------------------------------
 1 | digraph {
 2 | 	node [fontsize=9 height=0.2 shape=circle width=0.2]
 3 | 	In0 [fillcolor=lightgray shape=box style=filled]
 4 | 	In1 [fillcolor=lightgray shape=box style=filled]
 5 | 	In3 [fillcolor=lightgray shape=box style=filled]
 6 | 	In4 [fillcolor=lightgray shape=box style=filled]
 7 | 	act1 [fillcolor=lightblue style=filled]
 8 | 	act2 [fillcolor=lightblue style=filled]
 9 | 	137 [fillcolor=white style=filled]
10 | 	714 [fillcolor=white style=filled]
11 | 	626 [fillcolor=white style=filled]
12 | 	404 [fillcolor=white style=filled]
13 | 	246 [fillcolor=white style=filled]
14 | 	442 [fillcolor=white style=filled]
15 | 	540 [fillcolor=white style=filled]
16 | 	In0 -> act2 [color=red penwidth=0.15464140610078286 style=dotted]
17 | 	In1 -> act1 [color=green penwidth=0.20589896434649435 style=dotted]
18 | 	In1 -> act2 [color=green penwidth=0.32201856748289415 style=solid]
19 | 	In3 -> act1 [color=red penwidth=0.41018376786556177 style=dotted]
20 | 	In3 -> act2 [color=green penwidth=1.049253720927758 style=solid]
21 | 	In4 -> act1 [color=red penwidth=0.17560360127452074 style=dotted]
22 | 	In4 -> act2 [color=green penwidth=0.4084716161702079 style=solid]
23 | 	137 -> act1 [color=red penwidth=0.17428254929845405 style=dotted]
24 | 	In0 -> 246 [color=green penwidth=0.4724790110694175 style=dotted]
25 | 	In0 -> 404 [color=green penwidth=0.34381289726102127 style=dotted]
26 | 	137 -> 442 [color=green penwidth=0.4006046348289356 style=solid]
27 | 	442 -> act1 [color=green penwidth=0.5911627385006601 style=solid]
28 | 	In0 -> 540 [color=red penwidth=0.4238873029754118 style=solid]
29 | 	540 -> 404 [color=green penwidth=0.4927242488863397 style=solid]
30 | 	In0 -> 626 [color=green penwidth=0.37161877250004893 style=solid]
31 | 	626 -> 404 [color=green penwidth=0.1753711398635109 style=solid]
32 | 	137 -> 714 [color=green penwidth=0.24745341023372266 style=solid]
33 | 	714 -> act1 [color=red penwidth=0.3848767903179091 style=solid]
34 | 	246 -> 714 [color=red penwidth=0.28561121470609185 style=solid]
35 | }
36 | 


--------------------------------------------------------------------------------
/neat/Digraph.gv.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
 3 |  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 4 | <!-- Generated by graphviz version 2.38.0 (20140413.2041)
 5 |  -->
 6 | <!-- Title: %3 Pages: 1 -->
 7 | <svg width="178pt" height="106pt"
 8 |  viewBox="0.00 0.00 178.00 105.84" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 9 | <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 101.841)">
10 | <title>%3</title>
11 | <polygon fill="white" stroke="none" points="-4,4 -4,-101.841 174,-101.841 174,4 -4,4"/>
12 | <!-- In0 -->
13 | <g id="node1" class="node"><title>In0</title>
14 | <polygon fill="lightgray" stroke="black" points="123,-97.8406 94,-97.8406 94,-79.8406 123,-79.8406 123,-97.8406"/>
15 | <text text-anchor="middle" x="108.5" y="-86.6406" font-family="Times,serif" font-size="9.00">In0</text>
16 | </g>
17 | <!-- act1 -->
18 | <g id="node5" class="node"><title>act1</title>
19 | <ellipse fill="lightblue" stroke="black" cx="53.5" cy="-21.9203" rx="21.8409" ry="21.8409"/>
20 | <text text-anchor="middle" x="53.5" y="-19.7203" font-family="Times,serif" font-size="9.00">act1</text>
21 | </g>
22 | <!-- In0&#45;&gt;act1 -->
23 | <g id="edge1" class="edge"><title>In0&#45;&gt;act1</title>
24 | <path fill="none" stroke="red" stroke-width="0.614405" d="M101.534,-79.6184C94.5824,-71.4127 83.5631,-58.4058 73.8313,-46.9188"/>
25 | <polygon fill="red" stroke="red" stroke-width="0.614405" points="76.5001,-44.6543 67.3655,-39.2867 71.1591,-49.1791 76.5001,-44.6543"/>
26 | </g>
27 | <!-- act2 -->
28 | <g id="node6" class="node"><title>act2</title>
29 | <ellipse fill="lightblue" stroke="black" cx="115.5" cy="-21.9203" rx="21.8409" ry="21.8409"/>
30 | <text text-anchor="middle" x="115.5" y="-19.7203" font-family="Times,serif" font-size="9.00">act2</text>
31 | </g>
32 | <!-- In0&#45;&gt;act2 -->
33 | <g id="edge2" class="edge"><title>In0&#45;&gt;act2</title>
34 | <path fill="none" stroke="green" stroke-width="0.169112" d="M109.387,-79.6184C110.094,-73.0536 111.134,-63.4159 112.155,-53.9419"/>
35 | <polygon fill="green" stroke="green" stroke-width="0.169112" points="115.648,-54.1966 113.24,-43.879 108.688,-53.4462 115.648,-54.1966"/>
36 | </g>
37 | <!-- In1 -->
38 | <g id="node2" class="node"><title>In1</title>
39 | <polygon fill="lightgray" stroke="black" points="170,-97.8406 141,-97.8406 141,-79.8406 170,-79.8406 170,-97.8406"/>
40 | <text text-anchor="middle" x="155.5" y="-86.6406" font-family="Times,serif" font-size="9.00">In1</text>
41 | </g>
42 | <!-- In1&#45;&gt;act1 -->
43 | <g id="edge3" class="edge"><title>In1&#45;&gt;act1</title>
44 | <path fill="none" stroke="green" stroke-width="0.194262" d="M142.055,-79.7225C127.704,-71.0073 104.346,-56.68 84.5,-43.8406 82.8646,-42.7826 81.1867,-41.6852 79.4988,-40.5723"/>
45 | <polygon fill="green" stroke="green" stroke-width="0.194262" points="81.3874,-37.6249 71.126,-34.9878 77.5032,-43.4484 81.3874,-37.6249"/>
46 | </g>
47 | <!-- In1&#45;&gt;act2 -->
48 | <g id="edge4" class="edge"><title>In1&#45;&gt;act2</title>
49 | <path fill="none" stroke="green" stroke-width="0.216189" d="M150.434,-79.6184C145.77,-72.0482 138.588,-60.3916 131.946,-49.612"/>
50 | <polygon fill="green" stroke="green" stroke-width="0.216189" points="134.786,-47.55 126.561,-40.8723 128.827,-51.222 134.786,-47.55"/>
51 | </g>
52 | <!-- In3 -->
53 | <g id="node3" class="node"><title>In3</title>
54 | <polygon fill="lightgray" stroke="black" points="29,-97.8406 1.77636e-15,-97.8406 1.77636e-15,-79.8406 29,-79.8406 29,-97.8406"/>
55 | <text text-anchor="middle" x="14.5" y="-86.6406" font-family="Times,serif" font-size="9.00">In3</text>
56 | </g>
57 | <!-- In3&#45;&gt;act1 -->
58 | <g id="edge5" class="edge"><title>In3&#45;&gt;act1</title>
59 | <path fill="none" stroke="green" stroke-width="0.432533" d="M19.4394,-79.6184C23.9551,-72.1015 30.8911,-60.5556 37.3285,-49.8398"/>
60 | <polygon fill="green" stroke="green" stroke-width="0.432533" points="40.4051,-51.5151 42.5545,-41.1405 34.4046,-47.9103 40.4051,-51.5151"/>
61 | </g>
62 | <!-- In3&#45;&gt;act2 -->
63 | <g id="edge6" class="edge"><title>In3&#45;&gt;act2</title>
64 | <path fill="none" stroke="green" stroke-width="0.345291" d="M27.7406,-79.6956C41.8776,-70.958 64.8958,-56.6121 84.5,-43.8406 86.1321,-42.7774 87.8073,-41.6761 89.4933,-40.5601"/>
65 | <polygon fill="green" stroke="green" stroke-width="0.345291" points="91.4915,-43.4344 97.8612,-34.9681 87.6021,-37.6144 91.4915,-43.4344"/>
66 | </g>
67 | <!-- In4 -->
68 | <g id="node4" class="node"><title>In4</title>
69 | <polygon fill="lightgray" stroke="black" points="76,-97.8406 47,-97.8406 47,-79.8406 76,-79.8406 76,-97.8406"/>
70 | <text text-anchor="middle" x="61.5" y="-86.6406" font-family="Times,serif" font-size="9.00">In4</text>
71 | </g>
72 | <!-- In4&#45;&gt;act1 -->
73 | <g id="edge7" class="edge"><title>In4&#45;&gt;act1</title>
74 | <path fill="none" stroke="green" stroke-width="0.235768" d="M60.4868,-79.6184C59.6778,-73.0536 58.4902,-63.4159 57.3227,-53.9419"/>
75 | <polygon fill="green" stroke="green" stroke-width="0.235768" points="60.7795,-53.3759 56.0827,-43.879 53.8321,-54.232 60.7795,-53.3759"/>
76 | </g>
77 | <!-- In4&#45;&gt;act2 -->
78 | <g id="edge8" class="edge"><title>In4&#45;&gt;act2</title>
79 | <path fill="none" stroke="green" stroke-width="0.20708" d="M68.3391,-79.6184C75.0989,-71.4916 85.7759,-58.6555 95.2624,-47.2506"/>
80 | <polygon fill="green" stroke="green" stroke-width="0.20708" points="98.1826,-49.213 101.887,-39.2867 92.8009,-44.7366 98.1826,-49.213"/>
81 | </g>
82 | </g>
83 | </svg>
84 | 


--------------------------------------------------------------------------------
/neat/avg_fitness.svg:
--------------------------------------------------------------------------------
   1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
   2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
   3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
   4 | <!-- Created with matplotlib (https://matplotlib.org/) -->
   5 | <svg height="345.6pt" version="1.1" viewBox="0 0 460.8 345.6" width="460.8pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
   6 |  <defs>
   7 |   <style type="text/css">
   8 | *{stroke-linecap:butt;stroke-linejoin:round;}
   9 |   </style>
  10 |  </defs>
  11 |  <g id="figure_1">
  12 |   <g id="patch_1">
  13 |    <path d="M 0 345.6 
  14 | L 460.8 345.6 
  15 | L 460.8 0 
  16 | L 0 0 
  17 | z
  18 | " style="fill:#ffffff;"/>
  19 |   </g>
  20 |   <g id="axes_1">
  21 |    <g id="patch_2">
  22 |     <path d="M 57.6 307.584 
  23 | L 414.72 307.584 
  24 | L 414.72 41.472 
  25 | L 57.6 41.472 
  26 | z
  27 | " style="fill:#ffffff;"/>
  28 |    </g>
  29 |    <g id="matplotlib.axis_1">
  30 |     <g id="xtick_1">
  31 |      <g id="line2d_1">
  32 |       <path clip-path="url(#p7527cfc737)" d="M 73.832727 307.584 
  33 | L 73.832727 41.472 
  34 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
  35 |      </g>
  36 |      <g id="line2d_2">
  37 |       <defs>
  38 |        <path d="M 0 0 
  39 | L 0 3.5 
  40 | " id="m85c0a5472e" style="stroke:#000000;stroke-width:0.8;"/>
  41 |       </defs>
  42 |       <g>
  43 |        <use style="stroke:#000000;stroke-width:0.8;" x="73.832727" xlink:href="#m85c0a5472e" y="307.584"/>
  44 |       </g>
  45 |      </g>
  46 |      <g id="text_1">
  47 |       <!-- 0 -->
  48 |       <defs>
  49 |        <path d="M 31.78125 66.40625 
  50 | Q 24.171875 66.40625 20.328125 58.90625 
  51 | Q 16.5 51.421875 16.5 36.375 
  52 | Q 16.5 21.390625 20.328125 13.890625 
  53 | Q 24.171875 6.390625 31.78125 6.390625 
  54 | Q 39.453125 6.390625 43.28125 13.890625 
  55 | Q 47.125 21.390625 47.125 36.375 
  56 | Q 47.125 51.421875 43.28125 58.90625 
  57 | Q 39.453125 66.40625 31.78125 66.40625 
  58 | z
  59 | M 31.78125 74.21875 
  60 | Q 44.046875 74.21875 50.515625 64.515625 
  61 | Q 56.984375 54.828125 56.984375 36.375 
  62 | Q 56.984375 17.96875 50.515625 8.265625 
  63 | Q 44.046875 -1.421875 31.78125 -1.421875 
  64 | Q 19.53125 -1.421875 13.0625 8.265625 
  65 | Q 6.59375 17.96875 6.59375 36.375 
  66 | Q 6.59375 54.828125 13.0625 64.515625 
  67 | Q 19.53125 74.21875 31.78125 74.21875 
  68 | z
  69 | " id="DejaVuSans-48"/>
  70 |       </defs>
  71 |       <g transform="translate(70.651477 322.182437)scale(0.1 -0.1)">
  72 |        <use xlink:href="#DejaVuSans-48"/>
  73 |       </g>
  74 |      </g>
  75 |     </g>
  76 |     <g id="xtick_2">
  77 |      <g id="line2d_3">
  78 |       <path clip-path="url(#p7527cfc737)" d="M 145.978182 307.584 
  79 | L 145.978182 41.472 
  80 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
  81 |      </g>
  82 |      <g id="line2d_4">
  83 |       <g>
  84 |        <use style="stroke:#000000;stroke-width:0.8;" x="145.978182" xlink:href="#m85c0a5472e" y="307.584"/>
  85 |       </g>
  86 |      </g>
  87 |      <g id="text_2">
  88 |       <!-- 2 -->
  89 |       <defs>
  90 |        <path d="M 19.1875 8.296875 
  91 | L 53.609375 8.296875 
  92 | L 53.609375 0 
  93 | L 7.328125 0 
  94 | L 7.328125 8.296875 
  95 | Q 12.9375 14.109375 22.625 23.890625 
  96 | Q 32.328125 33.6875 34.8125 36.53125 
  97 | Q 39.546875 41.84375 41.421875 45.53125 
  98 | Q 43.3125 49.21875 43.3125 52.78125 
  99 | Q 43.3125 58.59375 39.234375 62.25 
 100 | Q 35.15625 65.921875 28.609375 65.921875 
 101 | Q 23.96875 65.921875 18.8125 64.3125 
 102 | Q 13.671875 62.703125 7.8125 59.421875 
 103 | L 7.8125 69.390625 
 104 | Q 13.765625 71.78125 18.9375 73 
 105 | Q 24.125 74.21875 28.421875 74.21875 
 106 | Q 39.75 74.21875 46.484375 68.546875 
 107 | Q 53.21875 62.890625 53.21875 53.421875 
 108 | Q 53.21875 48.921875 51.53125 44.890625 
 109 | Q 49.859375 40.875 45.40625 35.40625 
 110 | Q 44.1875 33.984375 37.640625 27.21875 
 111 | Q 31.109375 20.453125 19.1875 8.296875 
 112 | z
 113 | " id="DejaVuSans-50"/>
 114 |       </defs>
 115 |       <g transform="translate(142.796932 322.182437)scale(0.1 -0.1)">
 116 |        <use xlink:href="#DejaVuSans-50"/>
 117 |       </g>
 118 |      </g>
 119 |     </g>
 120 |     <g id="xtick_3">
 121 |      <g id="line2d_5">
 122 |       <path clip-path="url(#p7527cfc737)" d="M 218.123636 307.584 
 123 | L 218.123636 41.472 
 124 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
 125 |      </g>
 126 |      <g id="line2d_6">
 127 |       <g>
 128 |        <use style="stroke:#000000;stroke-width:0.8;" x="218.123636" xlink:href="#m85c0a5472e" y="307.584"/>
 129 |       </g>
 130 |      </g>
 131 |      <g id="text_3">
 132 |       <!-- 4 -->
 133 |       <defs>
 134 |        <path d="M 37.796875 64.3125 
 135 | L 12.890625 25.390625 
 136 | L 37.796875 25.390625 
 137 | z
 138 | M 35.203125 72.90625 
 139 | L 47.609375 72.90625 
 140 | L 47.609375 25.390625 
 141 | L 58.015625 25.390625 
 142 | L 58.015625 17.1875 
 143 | L 47.609375 17.1875 
 144 | L 47.609375 0 
 145 | L 37.796875 0 
 146 | L 37.796875 17.1875 
 147 | L 4.890625 17.1875 
 148 | L 4.890625 26.703125 
 149 | z
 150 | " id="DejaVuSans-52"/>
 151 |       </defs>
 152 |       <g transform="translate(214.942386 322.182437)scale(0.1 -0.1)">
 153 |        <use xlink:href="#DejaVuSans-52"/>
 154 |       </g>
 155 |      </g>
 156 |     </g>
 157 |     <g id="xtick_4">
 158 |      <g id="line2d_7">
 159 |       <path clip-path="url(#p7527cfc737)" d="M 290.269091 307.584 
 160 | L 290.269091 41.472 
 161 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
 162 |      </g>
 163 |      <g id="line2d_8">
 164 |       <g>
 165 |        <use style="stroke:#000000;stroke-width:0.8;" x="290.269091" xlink:href="#m85c0a5472e" y="307.584"/>
 166 |       </g>
 167 |      </g>
 168 |      <g id="text_4">
 169 |       <!-- 6 -->
 170 |       <defs>
 171 |        <path d="M 33.015625 40.375 
 172 | Q 26.375 40.375 22.484375 35.828125 
 173 | Q 18.609375 31.296875 18.609375 23.390625 
 174 | Q 18.609375 15.53125 22.484375 10.953125 
 175 | Q 26.375 6.390625 33.015625 6.390625 
 176 | Q 39.65625 6.390625 43.53125 10.953125 
 177 | Q 47.40625 15.53125 47.40625 23.390625 
 178 | Q 47.40625 31.296875 43.53125 35.828125 
 179 | Q 39.65625 40.375 33.015625 40.375 
 180 | z
 181 | M 52.59375 71.296875 
 182 | L 52.59375 62.3125 
 183 | Q 48.875 64.0625 45.09375 64.984375 
 184 | Q 41.3125 65.921875 37.59375 65.921875 
 185 | Q 27.828125 65.921875 22.671875 59.328125 
 186 | Q 17.53125 52.734375 16.796875 39.40625 
 187 | Q 19.671875 43.65625 24.015625 45.921875 
 188 | Q 28.375 48.1875 33.59375 48.1875 
 189 | Q 44.578125 48.1875 50.953125 41.515625 
 190 | Q 57.328125 34.859375 57.328125 23.390625 
 191 | Q 57.328125 12.15625 50.6875 5.359375 
 192 | Q 44.046875 -1.421875 33.015625 -1.421875 
 193 | Q 20.359375 -1.421875 13.671875 8.265625 
 194 | Q 6.984375 17.96875 6.984375 36.375 
 195 | Q 6.984375 53.65625 15.1875 63.9375 
 196 | Q 23.390625 74.21875 37.203125 74.21875 
 197 | Q 40.921875 74.21875 44.703125 73.484375 
 198 | Q 48.484375 72.75 52.59375 71.296875 
 199 | z
 200 | " id="DejaVuSans-54"/>
 201 |       </defs>
 202 |       <g transform="translate(287.087841 322.182437)scale(0.1 -0.1)">
 203 |        <use xlink:href="#DejaVuSans-54"/>
 204 |       </g>
 205 |      </g>
 206 |     </g>
 207 |     <g id="xtick_5">
 208 |      <g id="line2d_9">
 209 |       <path clip-path="url(#p7527cfc737)" d="M 362.414545 307.584 
 210 | L 362.414545 41.472 
 211 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
 212 |      </g>
 213 |      <g id="line2d_10">
 214 |       <g>
 215 |        <use style="stroke:#000000;stroke-width:0.8;" x="362.414545" xlink:href="#m85c0a5472e" y="307.584"/>
 216 |       </g>
 217 |      </g>
 218 |      <g id="text_5">
 219 |       <!-- 8 -->
 220 |       <defs>
 221 |        <path d="M 31.78125 34.625 
 222 | Q 24.75 34.625 20.71875 30.859375 
 223 | Q 16.703125 27.09375 16.703125 20.515625 
 224 | Q 16.703125 13.921875 20.71875 10.15625 
 225 | Q 24.75 6.390625 31.78125 6.390625 
 226 | Q 38.8125 6.390625 42.859375 10.171875 
 227 | Q 46.921875 13.96875 46.921875 20.515625 
 228 | Q 46.921875 27.09375 42.890625 30.859375 
 229 | Q 38.875 34.625 31.78125 34.625 
 230 | z
 231 | M 21.921875 38.8125 
 232 | Q 15.578125 40.375 12.03125 44.71875 
 233 | Q 8.5 49.078125 8.5 55.328125 
 234 | Q 8.5 64.0625 14.71875 69.140625 
 235 | Q 20.953125 74.21875 31.78125 74.21875 
 236 | Q 42.671875 74.21875 48.875 69.140625 
 237 | Q 55.078125 64.0625 55.078125 55.328125 
 238 | Q 55.078125 49.078125 51.53125 44.71875 
 239 | Q 48 40.375 41.703125 38.8125 
 240 | Q 48.828125 37.15625 52.796875 32.3125 
 241 | Q 56.78125 27.484375 56.78125 20.515625 
 242 | Q 56.78125 9.90625 50.3125 4.234375 
 243 | Q 43.84375 -1.421875 31.78125 -1.421875 
 244 | Q 19.734375 -1.421875 13.25 4.234375 
 245 | Q 6.78125 9.90625 6.78125 20.515625 
 246 | Q 6.78125 27.484375 10.78125 32.3125 
 247 | Q 14.796875 37.15625 21.921875 38.8125 
 248 | z
 249 | M 18.3125 54.390625 
 250 | Q 18.3125 48.734375 21.84375 45.5625 
 251 | Q 25.390625 42.390625 31.78125 42.390625 
 252 | Q 38.140625 42.390625 41.71875 45.5625 
 253 | Q 45.3125 48.734375 45.3125 54.390625 
 254 | Q 45.3125 60.0625 41.71875 63.234375 
 255 | Q 38.140625 66.40625 31.78125 66.40625 
 256 | Q 25.390625 66.40625 21.84375 63.234375 
 257 | Q 18.3125 60.0625 18.3125 54.390625 
 258 | z
 259 | " id="DejaVuSans-56"/>
 260 |       </defs>
 261 |       <g transform="translate(359.233295 322.182437)scale(0.1 -0.1)">
 262 |        <use xlink:href="#DejaVuSans-56"/>
 263 |       </g>
 264 |      </g>
 265 |     </g>
 266 |     <g id="text_6">
 267 |      <!-- Generations -->
 268 |      <defs>
 269 |       <path d="M 59.515625 10.40625 
 270 | L 59.515625 29.984375 
 271 | L 43.40625 29.984375 
 272 | L 43.40625 38.09375 
 273 | L 69.28125 38.09375 
 274 | L 69.28125 6.78125 
 275 | Q 63.578125 2.734375 56.6875 0.65625 
 276 | Q 49.8125 -1.421875 42 -1.421875 
 277 | Q 24.90625 -1.421875 15.25 8.5625 
 278 | Q 5.609375 18.5625 5.609375 36.375 
 279 | Q 5.609375 54.25 15.25 64.234375 
 280 | Q 24.90625 74.21875 42 74.21875 
 281 | Q 49.125 74.21875 55.546875 72.453125 
 282 | Q 61.96875 70.703125 67.390625 67.28125 
 283 | L 67.390625 56.78125 
 284 | Q 61.921875 61.421875 55.765625 63.765625 
 285 | Q 49.609375 66.109375 42.828125 66.109375 
 286 | Q 29.4375 66.109375 22.71875 58.640625 
 287 | Q 16.015625 51.171875 16.015625 36.375 
 288 | Q 16.015625 21.625 22.71875 14.15625 
 289 | Q 29.4375 6.6875 42.828125 6.6875 
 290 | Q 48.046875 6.6875 52.140625 7.59375 
 291 | Q 56.25 8.5 59.515625 10.40625 
 292 | z
 293 | " id="DejaVuSans-71"/>
 294 |       <path d="M 56.203125 29.59375 
 295 | L 56.203125 25.203125 
 296 | L 14.890625 25.203125 
 297 | Q 15.484375 15.921875 20.484375 11.0625 
 298 | Q 25.484375 6.203125 34.421875 6.203125 
 299 | Q 39.59375 6.203125 44.453125 7.46875 
 300 | Q 49.3125 8.734375 54.109375 11.28125 
 301 | L 54.109375 2.78125 
 302 | Q 49.265625 0.734375 44.1875 -0.34375 
 303 | Q 39.109375 -1.421875 33.890625 -1.421875 
 304 | Q 20.796875 -1.421875 13.15625 6.1875 
 305 | Q 5.515625 13.8125 5.515625 26.8125 
 306 | Q 5.515625 40.234375 12.765625 48.109375 
 307 | Q 20.015625 56 32.328125 56 
 308 | Q 43.359375 56 49.78125 48.890625 
 309 | Q 56.203125 41.796875 56.203125 29.59375 
 310 | z
 311 | M 47.21875 32.234375 
 312 | Q 47.125 39.59375 43.09375 43.984375 
 313 | Q 39.0625 48.390625 32.421875 48.390625 
 314 | Q 24.90625 48.390625 20.390625 44.140625 
 315 | Q 15.875 39.890625 15.1875 32.171875 
 316 | z
 317 | " id="DejaVuSans-101"/>
 318 |       <path d="M 54.890625 33.015625 
 319 | L 54.890625 0 
 320 | L 45.90625 0 
 321 | L 45.90625 32.71875 
 322 | Q 45.90625 40.484375 42.875 44.328125 
 323 | Q 39.84375 48.1875 33.796875 48.1875 
 324 | Q 26.515625 48.1875 22.3125 43.546875 
 325 | Q 18.109375 38.921875 18.109375 30.90625 
 326 | L 18.109375 0 
 327 | L 9.078125 0 
 328 | L 9.078125 54.6875 
 329 | L 18.109375 54.6875 
 330 | L 18.109375 46.1875 
 331 | Q 21.34375 51.125 25.703125 53.5625 
 332 | Q 30.078125 56 35.796875 56 
 333 | Q 45.21875 56 50.046875 50.171875 
 334 | Q 54.890625 44.34375 54.890625 33.015625 
 335 | z
 336 | " id="DejaVuSans-110"/>
 337 |       <path d="M 41.109375 46.296875 
 338 | Q 39.59375 47.171875 37.8125 47.578125 
 339 | Q 36.03125 48 33.890625 48 
 340 | Q 26.265625 48 22.1875 43.046875 
 341 | Q 18.109375 38.09375 18.109375 28.8125 
 342 | L 18.109375 0 
 343 | L 9.078125 0 
 344 | L 9.078125 54.6875 
 345 | L 18.109375 54.6875 
 346 | L 18.109375 46.1875 
 347 | Q 20.953125 51.171875 25.484375 53.578125 
 348 | Q 30.03125 56 36.53125 56 
 349 | Q 37.453125 56 38.578125 55.875 
 350 | Q 39.703125 55.765625 41.0625 55.515625 
 351 | z
 352 | " id="DejaVuSans-114"/>
 353 |       <path d="M 34.28125 27.484375 
 354 | Q 23.390625 27.484375 19.1875 25 
 355 | Q 14.984375 22.515625 14.984375 16.5 
 356 | Q 14.984375 11.71875 18.140625 8.90625 
 357 | Q 21.296875 6.109375 26.703125 6.109375 
 358 | Q 34.1875 6.109375 38.703125 11.40625 
 359 | Q 43.21875 16.703125 43.21875 25.484375 
 360 | L 43.21875 27.484375 
 361 | z
 362 | M 52.203125 31.203125 
 363 | L 52.203125 0 
 364 | L 43.21875 0 
 365 | L 43.21875 8.296875 
 366 | Q 40.140625 3.328125 35.546875 0.953125 
 367 | Q 30.953125 -1.421875 24.3125 -1.421875 
 368 | Q 15.921875 -1.421875 10.953125 3.296875 
 369 | Q 6 8.015625 6 15.921875 
 370 | Q 6 25.140625 12.171875 29.828125 
 371 | Q 18.359375 34.515625 30.609375 34.515625 
 372 | L 43.21875 34.515625 
 373 | L 43.21875 35.40625 
 374 | Q 43.21875 41.609375 39.140625 45 
 375 | Q 35.0625 48.390625 27.6875 48.390625 
 376 | Q 23 48.390625 18.546875 47.265625 
 377 | Q 14.109375 46.140625 10.015625 43.890625 
 378 | L 10.015625 52.203125 
 379 | Q 14.9375 54.109375 19.578125 55.046875 
 380 | Q 24.21875 56 28.609375 56 
 381 | Q 40.484375 56 46.34375 49.84375 
 382 | Q 52.203125 43.703125 52.203125 31.203125 
 383 | z
 384 | " id="DejaVuSans-97"/>
 385 |       <path d="M 18.3125 70.21875 
 386 | L 18.3125 54.6875 
 387 | L 36.8125 54.6875 
 388 | L 36.8125 47.703125 
 389 | L 18.3125 47.703125 
 390 | L 18.3125 18.015625 
 391 | Q 18.3125 11.328125 20.140625 9.421875 
 392 | Q 21.96875 7.515625 27.59375 7.515625 
 393 | L 36.8125 7.515625 
 394 | L 36.8125 0 
 395 | L 27.59375 0 
 396 | Q 17.1875 0 13.234375 3.875 
 397 | Q 9.28125 7.765625 9.28125 18.015625 
 398 | L 9.28125 47.703125 
 399 | L 2.6875 47.703125 
 400 | L 2.6875 54.6875 
 401 | L 9.28125 54.6875 
 402 | L 9.28125 70.21875 
 403 | z
 404 | " id="DejaVuSans-116"/>
 405 |       <path d="M 9.421875 54.6875 
 406 | L 18.40625 54.6875 
 407 | L 18.40625 0 
 408 | L 9.421875 0 
 409 | z
 410 | M 9.421875 75.984375 
 411 | L 18.40625 75.984375 
 412 | L 18.40625 64.59375 
 413 | L 9.421875 64.59375 
 414 | z
 415 | " id="DejaVuSans-105"/>
 416 |       <path d="M 30.609375 48.390625 
 417 | Q 23.390625 48.390625 19.1875 42.75 
 418 | Q 14.984375 37.109375 14.984375 27.296875 
 419 | Q 14.984375 17.484375 19.15625 11.84375 
 420 | Q 23.34375 6.203125 30.609375 6.203125 
 421 | Q 37.796875 6.203125 41.984375 11.859375 
 422 | Q 46.1875 17.53125 46.1875 27.296875 
 423 | Q 46.1875 37.015625 41.984375 42.703125 
 424 | Q 37.796875 48.390625 30.609375 48.390625 
 425 | z
 426 | M 30.609375 56 
 427 | Q 42.328125 56 49.015625 48.375 
 428 | Q 55.71875 40.765625 55.71875 27.296875 
 429 | Q 55.71875 13.875 49.015625 6.21875 
 430 | Q 42.328125 -1.421875 30.609375 -1.421875 
 431 | Q 18.84375 -1.421875 12.171875 6.21875 
 432 | Q 5.515625 13.875 5.515625 27.296875 
 433 | Q 5.515625 40.765625 12.171875 48.375 
 434 | Q 18.84375 56 30.609375 56 
 435 | z
 436 | " id="DejaVuSans-111"/>
 437 |       <path d="M 44.28125 53.078125 
 438 | L 44.28125 44.578125 
 439 | Q 40.484375 46.53125 36.375 47.5 
 440 | Q 32.28125 48.484375 27.875 48.484375 
 441 | Q 21.1875 48.484375 17.84375 46.4375 
 442 | Q 14.5 44.390625 14.5 40.28125 
 443 | Q 14.5 37.15625 16.890625 35.375 
 444 | Q 19.28125 33.59375 26.515625 31.984375 
 445 | L 29.59375 31.296875 
 446 | Q 39.15625 29.25 43.1875 25.515625 
 447 | Q 47.21875 21.78125 47.21875 15.09375 
 448 | Q 47.21875 7.46875 41.1875 3.015625 
 449 | Q 35.15625 -1.421875 24.609375 -1.421875 
 450 | Q 20.21875 -1.421875 15.453125 -0.5625 
 451 | Q 10.6875 0.296875 5.421875 2 
 452 | L 5.421875 11.28125 
 453 | Q 10.40625 8.6875 15.234375 7.390625 
 454 | Q 20.0625 6.109375 24.8125 6.109375 
 455 | Q 31.15625 6.109375 34.5625 8.28125 
 456 | Q 37.984375 10.453125 37.984375 14.40625 
 457 | Q 37.984375 18.0625 35.515625 20.015625 
 458 | Q 33.0625 21.96875 24.703125 23.78125 
 459 | L 21.578125 24.515625 
 460 | Q 13.234375 26.265625 9.515625 29.90625 
 461 | Q 5.8125 33.546875 5.8125 39.890625 
 462 | Q 5.8125 47.609375 11.28125 51.796875 
 463 | Q 16.75 56 26.8125 56 
 464 | Q 31.78125 56 36.171875 55.265625 
 465 | Q 40.578125 54.546875 44.28125 53.078125 
 466 | z
 467 | " id="DejaVuSans-115"/>
 468 |      </defs>
 469 |      <g transform="translate(205.662344 335.860562)scale(0.1 -0.1)">
 470 |       <use xlink:href="#DejaVuSans-71"/>
 471 |       <use x="77.490234" xlink:href="#DejaVuSans-101"/>
 472 |       <use x="139.013672" xlink:href="#DejaVuSans-110"/>
 473 |       <use x="202.392578" xlink:href="#DejaVuSans-101"/>
 474 |       <use x="263.916016" xlink:href="#DejaVuSans-114"/>
 475 |       <use x="305.029297" xlink:href="#DejaVuSans-97"/>
 476 |       <use x="366.308594" xlink:href="#DejaVuSans-116"/>
 477 |       <use x="405.517578" xlink:href="#DejaVuSans-105"/>
 478 |       <use x="433.300781" xlink:href="#DejaVuSans-111"/>
 479 |       <use x="494.482422" xlink:href="#DejaVuSans-110"/>
 480 |       <use x="557.861328" xlink:href="#DejaVuSans-115"/>
 481 |      </g>
 482 |     </g>
 483 |    </g>
 484 |    <g id="matplotlib.axis_2">
 485 |     <g id="ytick_1">
 486 |      <g id="line2d_11">
 487 |       <path clip-path="url(#p7527cfc737)" d="M 57.6 306.015042 
 488 | L 414.72 306.015042 
 489 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
 490 |      </g>
 491 |      <g id="line2d_12">
 492 |       <defs>
 493 |        <path d="M 0 0 
 494 | L -3.5 0 
 495 | " id="m0ff9248300" style="stroke:#000000;stroke-width:0.8;"/>
 496 |       </defs>
 497 |       <g>
 498 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#m0ff9248300" y="306.015042"/>
 499 |       </g>
 500 |      </g>
 501 |      <g id="text_7">
 502 |       <!-- 0.0 -->
 503 |       <defs>
 504 |        <path d="M 10.6875 12.40625 
 505 | L 21 12.40625 
 506 | L 21 0 
 507 | L 10.6875 0 
 508 | z
 509 | " id="DejaVuSans-46"/>
 510 |       </defs>
 511 |       <g transform="translate(34.696875 309.81426)scale(0.1 -0.1)">
 512 |        <use xlink:href="#DejaVuSans-48"/>
 513 |        <use x="63.623047" xlink:href="#DejaVuSans-46"/>
 514 |        <use x="95.410156" xlink:href="#DejaVuSans-48"/>
 515 |       </g>
 516 |      </g>
 517 |     </g>
 518 |     <g id="ytick_2">
 519 |      <g id="line2d_13">
 520 |       <path clip-path="url(#p7527cfc737)" d="M 57.6 255.525633 
 521 | L 414.72 255.525633 
 522 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
 523 |      </g>
 524 |      <g id="line2d_14">
 525 |       <g>
 526 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#m0ff9248300" y="255.525633"/>
 527 |       </g>
 528 |      </g>
 529 |      <g id="text_8">
 530 |       <!-- 0.2 -->
 531 |       <g transform="translate(34.696875 259.324852)scale(0.1 -0.1)">
 532 |        <use xlink:href="#DejaVuSans-48"/>
 533 |        <use x="63.623047" xlink:href="#DejaVuSans-46"/>
 534 |        <use x="95.410156" xlink:href="#DejaVuSans-50"/>
 535 |       </g>
 536 |      </g>
 537 |     </g>
 538 |     <g id="ytick_3">
 539 |      <g id="line2d_15">
 540 |       <path clip-path="url(#p7527cfc737)" d="M 57.6 205.036225 
 541 | L 414.72 205.036225 
 542 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
 543 |      </g>
 544 |      <g id="line2d_16">
 545 |       <g>
 546 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#m0ff9248300" y="205.036225"/>
 547 |       </g>
 548 |      </g>
 549 |      <g id="text_9">
 550 |       <!-- 0.4 -->
 551 |       <g transform="translate(34.696875 208.835444)scale(0.1 -0.1)">
 552 |        <use xlink:href="#DejaVuSans-48"/>
 553 |        <use x="63.623047" xlink:href="#DejaVuSans-46"/>
 554 |        <use x="95.410156" xlink:href="#DejaVuSans-52"/>
 555 |       </g>
 556 |      </g>
 557 |     </g>
 558 |     <g id="ytick_4">
 559 |      <g id="line2d_17">
 560 |       <path clip-path="url(#p7527cfc737)" d="M 57.6 154.546817 
 561 | L 414.72 154.546817 
 562 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
 563 |      </g>
 564 |      <g id="line2d_18">
 565 |       <g>
 566 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#m0ff9248300" y="154.546817"/>
 567 |       </g>
 568 |      </g>
 569 |      <g id="text_10">
 570 |       <!-- 0.6 -->
 571 |       <g transform="translate(34.696875 158.346035)scale(0.1 -0.1)">
 572 |        <use xlink:href="#DejaVuSans-48"/>
 573 |        <use x="63.623047" xlink:href="#DejaVuSans-46"/>
 574 |        <use x="95.410156" xlink:href="#DejaVuSans-54"/>
 575 |       </g>
 576 |      </g>
 577 |     </g>
 578 |     <g id="ytick_5">
 579 |      <g id="line2d_19">
 580 |       <path clip-path="url(#p7527cfc737)" d="M 57.6 104.057408 
 581 | L 414.72 104.057408 
 582 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
 583 |      </g>
 584 |      <g id="line2d_20">
 585 |       <g>
 586 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#m0ff9248300" y="104.057408"/>
 587 |       </g>
 588 |      </g>
 589 |      <g id="text_11">
 590 |       <!-- 0.8 -->
 591 |       <g transform="translate(34.696875 107.856627)scale(0.1 -0.1)">
 592 |        <use xlink:href="#DejaVuSans-48"/>
 593 |        <use x="63.623047" xlink:href="#DejaVuSans-46"/>
 594 |        <use x="95.410156" xlink:href="#DejaVuSans-56"/>
 595 |       </g>
 596 |      </g>
 597 |     </g>
 598 |     <g id="ytick_6">
 599 |      <g id="line2d_21">
 600 |       <path clip-path="url(#p7527cfc737)" d="M 57.6 53.568 
 601 | L 414.72 53.568 
 602 | " style="fill:none;stroke:#b0b0b0;stroke-linecap:square;stroke-width:0.8;"/>
 603 |      </g>
 604 |      <g id="line2d_22">
 605 |       <g>
 606 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#m0ff9248300" y="53.568"/>
 607 |       </g>
 608 |      </g>
 609 |      <g id="text_12">
 610 |       <!-- 1.0 -->
 611 |       <defs>
 612 |        <path d="M 12.40625 8.296875 
 613 | L 28.515625 8.296875 
 614 | L 28.515625 63.921875 
 615 | L 10.984375 60.40625 
 616 | L 10.984375 69.390625 
 617 | L 28.421875 72.90625 
 618 | L 38.28125 72.90625 
 619 | L 38.28125 8.296875 
 620 | L 54.390625 8.296875 
 621 | L 54.390625 0 
 622 | L 12.40625 0 
 623 | z
 624 | " id="DejaVuSans-49"/>
 625 |       </defs>
 626 |       <g transform="translate(34.696875 57.367219)scale(0.1 -0.1)">
 627 |        <use xlink:href="#DejaVuSans-49"/>
 628 |        <use x="63.623047" xlink:href="#DejaVuSans-46"/>
 629 |        <use x="95.410156" xlink:href="#DejaVuSans-48"/>
 630 |       </g>
 631 |      </g>
 632 |     </g>
 633 |     <g id="text_13">
 634 |      <!-- Fitness -->
 635 |      <defs>
 636 |       <path d="M 9.8125 72.90625 
 637 | L 51.703125 72.90625 
 638 | L 51.703125 64.59375 
 639 | L 19.671875 64.59375 
 640 | L 19.671875 43.109375 
 641 | L 48.578125 43.109375 
 642 | L 48.578125 34.8125 
 643 | L 19.671875 34.8125 
 644 | L 19.671875 0 
 645 | L 9.8125 0 
 646 | z
 647 | " id="DejaVuSans-70"/>
 648 |      </defs>
 649 |      <g transform="translate(28.617187 192.202219)rotate(-90)scale(0.1 -0.1)">
 650 |       <use xlink:href="#DejaVuSans-70"/>
 651 |       <use x="57.410156" xlink:href="#DejaVuSans-105"/>
 652 |       <use x="85.193359" xlink:href="#DejaVuSans-116"/>
 653 |       <use x="124.402344" xlink:href="#DejaVuSans-110"/>
 654 |       <use x="187.78125" xlink:href="#DejaVuSans-101"/>
 655 |       <use x="249.304688" xlink:href="#DejaVuSans-115"/>
 656 |       <use x="301.404297" xlink:href="#DejaVuSans-115"/>
 657 |      </g>
 658 |     </g>
 659 |    </g>
 660 |    <g id="line2d_23">
 661 |     <path clip-path="url(#p7527cfc737)" d="M 73.832727 295.488 
 662 | L 109.905455 285.465852 
 663 | L 145.978182 277.934516 
 664 | L 182.050909 266.835261 
 665 | L 218.123636 257.268113 
 666 | L 254.196364 250.28315 
 667 | L 290.269091 239.040841 
 668 | L 326.341818 249.44166 
 669 | L 362.414545 245.915816 
 670 | L 398.487273 221.133931 
 671 | " style="fill:none;stroke:#0000ff;stroke-linecap:square;stroke-width:1.5;"/>
 672 |    </g>
 673 |    <g id="line2d_24">
 674 |     <path clip-path="url(#p7527cfc737)" d="M 73.832727 285.397488 
 675 | L 109.905455 262.788925 
 676 | L 145.978182 239.629675 
 677 | L 182.050909 214.849921 
 678 | L 218.123636 193.691505 
 679 | L 254.196364 180.959084 
 680 | L 290.269091 154.743259 
 681 | L 326.341818 177.028037 
 682 | L 362.414545 163.366947 
 683 | L 398.487273 124.525245 
 684 | " style="fill:none;stroke:#008000;stroke-dasharray:9.6,2.4,1.5,2.4;stroke-dashoffset:0;stroke-width:1.5;"/>
 685 |    </g>
 686 |    <g id="line2d_25">
 687 |     <path clip-path="url(#p7527cfc737)" d="M 73.832727 216.817087 
 688 | L 109.905455 183.157481 
 689 | L 145.978182 53.568 
 690 | L 182.050909 53.568 
 691 | L 218.123636 53.568 
 692 | L 254.196364 53.568 
 693 | L 290.269091 53.568 
 694 | L 326.341818 53.568 
 695 | L 362.414545 53.568 
 696 | L 398.487273 53.568 
 697 | " style="fill:none;stroke:#ff0000;stroke-linecap:square;stroke-width:1.5;"/>
 698 |    </g>
 699 |    <g id="patch_3">
 700 |     <path d="M 57.6 307.584 
 701 | L 57.6 41.472 
 702 | " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>
 703 |    </g>
 704 |    <g id="patch_4">
 705 |     <path d="M 414.72 307.584 
 706 | L 414.72 41.472 
 707 | " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>
 708 |    </g>
 709 |    <g id="patch_5">
 710 |     <path d="M 57.6 307.584 
 711 | L 414.72 307.584 
 712 | " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>
 713 |    </g>
 714 |    <g id="patch_6">
 715 |     <path d="M 57.6 41.472 
 716 | L 414.72 41.472 
 717 | " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>
 718 |    </g>
 719 |    <g id="text_14">
 720 |     <!-- Population's average and best fitness -->
 721 |     <defs>
 722 |      <path d="M 19.671875 64.796875 
 723 | L 19.671875 37.40625 
 724 | L 32.078125 37.40625 
 725 | Q 38.96875 37.40625 42.71875 40.96875 
 726 | Q 46.484375 44.53125 46.484375 51.125 
 727 | Q 46.484375 57.671875 42.71875 61.234375 
 728 | Q 38.96875 64.796875 32.078125 64.796875 
 729 | z
 730 | M 9.8125 72.90625 
 731 | L 32.078125 72.90625 
 732 | Q 44.34375 72.90625 50.609375 67.359375 
 733 | Q 56.890625 61.8125 56.890625 51.125 
 734 | Q 56.890625 40.328125 50.609375 34.8125 
 735 | Q 44.34375 29.296875 32.078125 29.296875 
 736 | L 19.671875 29.296875 
 737 | L 19.671875 0 
 738 | L 9.8125 0 
 739 | z
 740 | " id="DejaVuSans-80"/>
 741 |      <path d="M 18.109375 8.203125 
 742 | L 18.109375 -20.796875 
 743 | L 9.078125 -20.796875 
 744 | L 9.078125 54.6875 
 745 | L 18.109375 54.6875 
 746 | L 18.109375 46.390625 
 747 | Q 20.953125 51.265625 25.265625 53.625 
 748 | Q 29.59375 56 35.59375 56 
 749 | Q 45.5625 56 51.78125 48.09375 
 750 | Q 58.015625 40.1875 58.015625 27.296875 
 751 | Q 58.015625 14.40625 51.78125 6.484375 
 752 | Q 45.5625 -1.421875 35.59375 -1.421875 
 753 | Q 29.59375 -1.421875 25.265625 0.953125 
 754 | Q 20.953125 3.328125 18.109375 8.203125 
 755 | z
 756 | M 48.6875 27.296875 
 757 | Q 48.6875 37.203125 44.609375 42.84375 
 758 | Q 40.53125 48.484375 33.40625 48.484375 
 759 | Q 26.265625 48.484375 22.1875 42.84375 
 760 | Q 18.109375 37.203125 18.109375 27.296875 
 761 | Q 18.109375 17.390625 22.1875 11.75 
 762 | Q 26.265625 6.109375 33.40625 6.109375 
 763 | Q 40.53125 6.109375 44.609375 11.75 
 764 | Q 48.6875 17.390625 48.6875 27.296875 
 765 | z
 766 | " id="DejaVuSans-112"/>
 767 |      <path d="M 8.5 21.578125 
 768 | L 8.5 54.6875 
 769 | L 17.484375 54.6875 
 770 | L 17.484375 21.921875 
 771 | Q 17.484375 14.15625 20.5 10.265625 
 772 | Q 23.53125 6.390625 29.59375 6.390625 
 773 | Q 36.859375 6.390625 41.078125 11.03125 
 774 | Q 45.3125 15.671875 45.3125 23.6875 
 775 | L 45.3125 54.6875 
 776 | L 54.296875 54.6875 
 777 | L 54.296875 0 
 778 | L 45.3125 0 
 779 | L 45.3125 8.40625 
 780 | Q 42.046875 3.421875 37.71875 1 
 781 | Q 33.40625 -1.421875 27.6875 -1.421875 
 782 | Q 18.265625 -1.421875 13.375 4.4375 
 783 | Q 8.5 10.296875 8.5 21.578125 
 784 | z
 785 | M 31.109375 56 
 786 | z
 787 | " id="DejaVuSans-117"/>
 788 |      <path d="M 9.421875 75.984375 
 789 | L 18.40625 75.984375 
 790 | L 18.40625 0 
 791 | L 9.421875 0 
 792 | z
 793 | " id="DejaVuSans-108"/>
 794 |      <path d="M 17.921875 72.90625 
 795 | L 17.921875 45.796875 
 796 | L 9.625 45.796875 
 797 | L 9.625 72.90625 
 798 | z
 799 | " id="DejaVuSans-39"/>
 800 |      <path id="DejaVuSans-32"/>
 801 |      <path d="M 2.984375 54.6875 
 802 | L 12.5 54.6875 
 803 | L 29.59375 8.796875 
 804 | L 46.6875 54.6875 
 805 | L 56.203125 54.6875 
 806 | L 35.6875 0 
 807 | L 23.484375 0 
 808 | z
 809 | " id="DejaVuSans-118"/>
 810 |      <path d="M 45.40625 27.984375 
 811 | Q 45.40625 37.75 41.375 43.109375 
 812 | Q 37.359375 48.484375 30.078125 48.484375 
 813 | Q 22.859375 48.484375 18.828125 43.109375 
 814 | Q 14.796875 37.75 14.796875 27.984375 
 815 | Q 14.796875 18.265625 18.828125 12.890625 
 816 | Q 22.859375 7.515625 30.078125 7.515625 
 817 | Q 37.359375 7.515625 41.375 12.890625 
 818 | Q 45.40625 18.265625 45.40625 27.984375 
 819 | z
 820 | M 54.390625 6.78125 
 821 | Q 54.390625 -7.171875 48.1875 -13.984375 
 822 | Q 42 -20.796875 29.203125 -20.796875 
 823 | Q 24.46875 -20.796875 20.265625 -20.09375 
 824 | Q 16.0625 -19.390625 12.109375 -17.921875 
 825 | L 12.109375 -9.1875 
 826 | Q 16.0625 -11.328125 19.921875 -12.34375 
 827 | Q 23.78125 -13.375 27.78125 -13.375 
 828 | Q 36.625 -13.375 41.015625 -8.765625 
 829 | Q 45.40625 -4.15625 45.40625 5.171875 
 830 | L 45.40625 9.625 
 831 | Q 42.625 4.78125 38.28125 2.390625 
 832 | Q 33.9375 0 27.875 0 
 833 | Q 17.828125 0 11.671875 7.65625 
 834 | Q 5.515625 15.328125 5.515625 27.984375 
 835 | Q 5.515625 40.671875 11.671875 48.328125 
 836 | Q 17.828125 56 27.875 56 
 837 | Q 33.9375 56 38.28125 53.609375 
 838 | Q 42.625 51.21875 45.40625 46.390625 
 839 | L 45.40625 54.6875 
 840 | L 54.390625 54.6875 
 841 | z
 842 | " id="DejaVuSans-103"/>
 843 |      <path d="M 45.40625 46.390625 
 844 | L 45.40625 75.984375 
 845 | L 54.390625 75.984375 
 846 | L 54.390625 0 
 847 | L 45.40625 0 
 848 | L 45.40625 8.203125 
 849 | Q 42.578125 3.328125 38.25 0.953125 
 850 | Q 33.9375 -1.421875 27.875 -1.421875 
 851 | Q 17.96875 -1.421875 11.734375 6.484375 
 852 | Q 5.515625 14.40625 5.515625 27.296875 
 853 | Q 5.515625 40.1875 11.734375 48.09375 
 854 | Q 17.96875 56 27.875 56 
 855 | Q 33.9375 56 38.25 53.625 
 856 | Q 42.578125 51.265625 45.40625 46.390625 
 857 | z
 858 | M 14.796875 27.296875 
 859 | Q 14.796875 17.390625 18.875 11.75 
 860 | Q 22.953125 6.109375 30.078125 6.109375 
 861 | Q 37.203125 6.109375 41.296875 11.75 
 862 | Q 45.40625 17.390625 45.40625 27.296875 
 863 | Q 45.40625 37.203125 41.296875 42.84375 
 864 | Q 37.203125 48.484375 30.078125 48.484375 
 865 | Q 22.953125 48.484375 18.875 42.84375 
 866 | Q 14.796875 37.203125 14.796875 27.296875 
 867 | z
 868 | " id="DejaVuSans-100"/>
 869 |      <path d="M 48.6875 27.296875 
 870 | Q 48.6875 37.203125 44.609375 42.84375 
 871 | Q 40.53125 48.484375 33.40625 48.484375 
 872 | Q 26.265625 48.484375 22.1875 42.84375 
 873 | Q 18.109375 37.203125 18.109375 27.296875 
 874 | Q 18.109375 17.390625 22.1875 11.75 
 875 | Q 26.265625 6.109375 33.40625 6.109375 
 876 | Q 40.53125 6.109375 44.609375 11.75 
 877 | Q 48.6875 17.390625 48.6875 27.296875 
 878 | z
 879 | M 18.109375 46.390625 
 880 | Q 20.953125 51.265625 25.265625 53.625 
 881 | Q 29.59375 56 35.59375 56 
 882 | Q 45.5625 56 51.78125 48.09375 
 883 | Q 58.015625 40.1875 58.015625 27.296875 
 884 | Q 58.015625 14.40625 51.78125 6.484375 
 885 | Q 45.5625 -1.421875 35.59375 -1.421875 
 886 | Q 29.59375 -1.421875 25.265625 0.953125 
 887 | Q 20.953125 3.328125 18.109375 8.203125 
 888 | L 18.109375 0 
 889 | L 9.078125 0 
 890 | L 9.078125 75.984375 
 891 | L 18.109375 75.984375 
 892 | z
 893 | " id="DejaVuSans-98"/>
 894 |      <path d="M 37.109375 75.984375 
 895 | L 37.109375 68.5 
 896 | L 28.515625 68.5 
 897 | Q 23.6875 68.5 21.796875 66.546875 
 898 | Q 19.921875 64.59375 19.921875 59.515625 
 899 | L 19.921875 54.6875 
 900 | L 34.71875 54.6875 
 901 | L 34.71875 47.703125 
 902 | L 19.921875 47.703125 
 903 | L 19.921875 0 
 904 | L 10.890625 0 
 905 | L 10.890625 47.703125 
 906 | L 2.296875 47.703125 
 907 | L 2.296875 54.6875 
 908 | L 10.890625 54.6875 
 909 | L 10.890625 58.5 
 910 | Q 10.890625 67.625 15.140625 71.796875 
 911 | Q 19.390625 75.984375 28.609375 75.984375 
 912 | z
 913 | " id="DejaVuSans-102"/>
 914 |     </defs>
 915 |     <g transform="translate(123.315 35.472)scale(0.12 -0.12)">
 916 |      <use xlink:href="#DejaVuSans-80"/>
 917 |      <use x="60.255859" xlink:href="#DejaVuSans-111"/>
 918 |      <use x="121.4375" xlink:href="#DejaVuSans-112"/>
 919 |      <use x="184.914062" xlink:href="#DejaVuSans-117"/>
 920 |      <use x="248.292969" xlink:href="#DejaVuSans-108"/>
 921 |      <use x="276.076172" xlink:href="#DejaVuSans-97"/>
 922 |      <use x="337.355469" xlink:href="#DejaVuSans-116"/>
 923 |      <use x="376.564453" xlink:href="#DejaVuSans-105"/>
 924 |      <use x="404.347656" xlink:href="#DejaVuSans-111"/>
 925 |      <use x="465.529297" xlink:href="#DejaVuSans-110"/>
 926 |      <use x="528.908203" xlink:href="#DejaVuSans-39"/>
 927 |      <use x="556.398438" xlink:href="#DejaVuSans-115"/>
 928 |      <use x="608.498047" xlink:href="#DejaVuSans-32"/>
 929 |      <use x="640.285156" xlink:href="#DejaVuSans-97"/>
 930 |      <use x="701.564453" xlink:href="#DejaVuSans-118"/>
 931 |      <use x="760.744141" xlink:href="#DejaVuSans-101"/>
 932 |      <use x="822.267578" xlink:href="#DejaVuSans-114"/>
 933 |      <use x="863.380859" xlink:href="#DejaVuSans-97"/>
 934 |      <use x="924.660156" xlink:href="#DejaVuSans-103"/>
 935 |      <use x="988.136719" xlink:href="#DejaVuSans-101"/>
 936 |      <use x="1049.660156" xlink:href="#DejaVuSans-32"/>
 937 |      <use x="1081.447266" xlink:href="#DejaVuSans-97"/>
 938 |      <use x="1142.726562" xlink:href="#DejaVuSans-110"/>
 939 |      <use x="1206.105469" xlink:href="#DejaVuSans-100"/>
 940 |      <use x="1269.582031" xlink:href="#DejaVuSans-32"/>
 941 |      <use x="1301.369141" xlink:href="#DejaVuSans-98"/>
 942 |      <use x="1364.845703" xlink:href="#DejaVuSans-101"/>
 943 |      <use x="1426.369141" xlink:href="#DejaVuSans-115"/>
 944 |      <use x="1478.46875" xlink:href="#DejaVuSans-116"/>
 945 |      <use x="1517.677734" xlink:href="#DejaVuSans-32"/>
 946 |      <use x="1549.464844" xlink:href="#DejaVuSans-102"/>
 947 |      <use x="1584.669922" xlink:href="#DejaVuSans-105"/>
 948 |      <use x="1612.453125" xlink:href="#DejaVuSans-116"/>
 949 |      <use x="1651.662109" xlink:href="#DejaVuSans-110"/>
 950 |      <use x="1715.041016" xlink:href="#DejaVuSans-101"/>
 951 |      <use x="1776.564453" xlink:href="#DejaVuSans-115"/>
 952 |      <use x="1828.664062" xlink:href="#DejaVuSans-115"/>
 953 |     </g>
 954 |    </g>
 955 |    <g id="legend_1">
 956 |     <g id="patch_7">
 957 |      <path d="M 334.779375 302.584 
 958 | L 407.72 302.584 
 959 | Q 409.72 302.584 409.72 300.584 
 960 | L 409.72 257.549625 
 961 | Q 409.72 255.549625 407.72 255.549625 
 962 | L 334.779375 255.549625 
 963 | Q 332.779375 255.549625 332.779375 257.549625 
 964 | L 332.779375 300.584 
 965 | Q 332.779375 302.584 334.779375 302.584 
 966 | z
 967 | " style="fill:#ffffff;opacity:0.8;stroke:#cccccc;stroke-linejoin:miter;"/>
 968 |     </g>
 969 |     <g id="line2d_26">
 970 |      <path d="M 336.779375 263.648062 
 971 | L 356.779375 263.648062 
 972 | " style="fill:none;stroke:#0000ff;stroke-linecap:square;stroke-width:1.5;"/>
 973 |     </g>
 974 |     <g id="line2d_27"/>
 975 |     <g id="text_15">
 976 |      <!-- average -->
 977 |      <g transform="translate(364.779375 267.148062)scale(0.1 -0.1)">
 978 |       <use xlink:href="#DejaVuSans-97"/>
 979 |       <use x="61.279297" xlink:href="#DejaVuSans-118"/>
 980 |       <use x="120.458984" xlink:href="#DejaVuSans-101"/>
 981 |       <use x="181.982422" xlink:href="#DejaVuSans-114"/>
 982 |       <use x="223.095703" xlink:href="#DejaVuSans-97"/>
 983 |       <use x="284.375" xlink:href="#DejaVuSans-103"/>
 984 |       <use x="347.851562" xlink:href="#DejaVuSans-101"/>
 985 |      </g>
 986 |     </g>
 987 |     <g id="line2d_28">
 988 |      <path d="M 336.779375 278.326188 
 989 | L 356.779375 278.326188 
 990 | " style="fill:none;stroke:#008000;stroke-dasharray:9.6,2.4,1.5,2.4;stroke-dashoffset:0;stroke-width:1.5;"/>
 991 |     </g>
 992 |     <g id="line2d_29"/>
 993 |     <g id="text_16">
 994 |      <!-- +1 sd -->
 995 |      <defs>
 996 |       <path d="M 46 62.703125 
 997 | L 46 35.5 
 998 | L 73.1875 35.5 
 999 | L 73.1875 27.203125 
1000 | L 46 27.203125 
1001 | L 46 0 
1002 | L 37.796875 0 
1003 | L 37.796875 27.203125 
1004 | L 10.59375 27.203125 
1005 | L 10.59375 35.5 
1006 | L 37.796875 35.5 
1007 | L 37.796875 62.703125 
1008 | z
1009 | " id="DejaVuSans-43"/>
1010 |      </defs>
1011 |      <g transform="translate(364.779375 281.826188)scale(0.1 -0.1)">
1012 |       <use xlink:href="#DejaVuSans-43"/>
1013 |       <use x="83.789062" xlink:href="#DejaVuSans-49"/>
1014 |       <use x="147.412109" xlink:href="#DejaVuSans-32"/>
1015 |       <use x="179.199219" xlink:href="#DejaVuSans-115"/>
1016 |       <use x="231.298828" xlink:href="#DejaVuSans-100"/>
1017 |      </g>
1018 |     </g>
1019 |     <g id="line2d_30">
1020 |      <path d="M 336.779375 293.004312 
1021 | L 356.779375 293.004312 
1022 | " style="fill:none;stroke:#ff0000;stroke-linecap:square;stroke-width:1.5;"/>
1023 |     </g>
1024 |     <g id="line2d_31"/>
1025 |     <g id="text_17">
1026 |      <!-- best -->
1027 |      <g transform="translate(364.779375 296.504312)scale(0.1 -0.1)">
1028 |       <use xlink:href="#DejaVuSans-98"/>
1029 |       <use x="63.476562" xlink:href="#DejaVuSans-101"/>
1030 |       <use x="125" xlink:href="#DejaVuSans-115"/>
1031 |       <use x="177.099609" xlink:href="#DejaVuSans-116"/>
1032 |      </g>
1033 |     </g>
1034 |    </g>
1035 |   </g>
1036 |  </g>
1037 |  <defs>
1038 |   <clipPath id="p7527cfc737">
1039 |    <rect height="266.112" width="357.12" x="57.6" y="41.472"/>
1040 |   </clipPath>
1041 |  </defs>
1042 | </svg>
1043 | 


--------------------------------------------------------------------------------
/neat/cartpole.py:
--------------------------------------------------------------------------------
 1 | import neat
 2 | import sys
 3 | import numpy as np
 4 | import gym
 5 | import visualize
 6 | 
 7 | GAME = 'CartPole-v0'
 8 | env = gym.make(GAME).unwrapped
 9 | 
10 | CONFIG = "./config"
11 | EP_STEP = 300
12 | GENERATION_EP = 10
13 | CHECKPOINT = 9
14 | 
15 | def eval_genomes(genomes, config):
16 |     for genome_id, genome in genomes:
17 |         net = neat.nn.FeedForwardNetwork.create(genome, config)
18 |         ep_r = []
19 |         for ep in range(GENERATION_EP): 
20 |             accumulative_r = 0
21 |             observation = env.reset()
22 |             for t in range(EP_STEP):
23 |                 action_values = net.activate(observation)
24 |                 action = np.argmax(action_values)
25 |                 observation_, reward, done, _ = env.step(action)
26 |                 accumulative_r += reward
27 |                 if done:
28 |                     break
29 |                 observation = observation_
30 |             ep_r.append(accumulative_r)
31 |         genome.fitness = np.min(ep_r)/float(EP_STEP)
32 | 
33 | def run():
34 |     config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
35 |                          neat.DefaultSpeciesSet, neat.DefaultStagnation, CONFIG)
36 |     pop = neat.Population(config)
37 | 
38 |     # recode history
39 |     stats = neat.StatisticsReporter()
40 |     pop.add_reporter(stats)
41 |     pop.add_reporter(neat.StdOutReporter(True))
42 |     pop.add_reporter(neat.Checkpointer(5))
43 | 
44 |     pop.run(eval_genomes, 10)
45 | 
46 |     # visualize training
47 |     visualize.plot_stats(stats, ylog=False, view=True)
48 |     visualize.plot_species(stats, view=True)
49 | 
50 | def evaluation():
51 |     p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-%i' % CHECKPOINT)
52 |     winner = p.run(eval_genomes, 1)
53 | 
54 |     # show winner net
55 |     node_names = {-1: 'In0', -2: 'In1', -3: 'In3', -4: 'In4', 0: 'act1', 1: 'act2'}
56 |     visualize.draw_net(p.config, winner, True, node_names=node_names)
57 | 
58 |     net = neat.nn.FeedForwardNetwork.create(winner, p.config)
59 |     while True:
60 |         s = env.reset()
61 |         while True:
62 |             env.render()
63 |             a = np.argmax(net.activate(s))
64 |             s, r, done, _ = env.step(a)
65 |             if done: break
66 | 
67 | if __name__ == '__main__':
68 |     TRAINING = sys.argv[1]
69 | 
70 |     if TRAINING == 'TRAIN':
71 |         run()
72 |     elif TRAINING == 'EVAL':
73 |         evaluation()
74 |     else:
75 |         print('Please indicate TRAIN or EVAL')
76 | 


--------------------------------------------------------------------------------
/neat/config:
--------------------------------------------------------------------------------
 1 | # neat-python configuration for the LunarLander-v2 environment on OpenAI Gym
 2 | 
 3 | [NEAT]
 4 | pop_size              = 100
 5 | # Note: the fitness threshold will never be reached because
 6 | # we are controlling the termination ourselves based on simulation performance.
 7 | fitness_criterion     = max
 8 | fitness_threshold     = 2.
 9 | reset_on_extinction   = 0
10 | 
11 | [DefaultGenome]
12 | # node activation options
13 | activation_default      = relu
14 | activation_mutate_rate  = 0.0
15 | activation_options      = relu
16 | 
17 | # node aggregation options
18 | aggregation_default     = sum
19 | aggregation_mutate_rate = 0.0
20 | aggregation_options     = sum
21 | 
22 | # node bias options
23 | bias_init_mean          = 0.0
24 | bias_init_stdev         = 1.0
25 | bias_max_value          = 30.0
26 | bias_min_value          = -30.0
27 | bias_mutate_power       = 0.5
28 | bias_mutate_rate        = 0.7
29 | bias_replace_rate       = 0.1
30 | 
31 | # genome compatibility options
32 | compatibility_disjoint_coefficient = 1.0
33 | compatibility_weight_coefficient   = 1.0
34 | 
35 | # connection add/remove rates
36 | conn_add_prob           = 0.9
37 | conn_delete_prob        = 0.2
38 | 
39 | # connection enable options
40 | enabled_default         = True
41 | enabled_mutate_rate     = 0.01
42 | 
43 | feed_forward            = True
44 | initial_connection      = full
45 | # options (unconnected, fs_neat, full)
46 | 
47 | # node add/remove rates
48 | node_add_prob           = 0.9
49 | node_delete_prob        = 0.2
50 | 
51 | # network parameters
52 | num_hidden              = 0
53 | num_inputs              = 4
54 | num_outputs             = 2
55 | 
56 | # node response options
57 | response_init_mean      = 1.0
58 | response_init_stdev     = 0.0
59 | response_max_value      = 30.0
60 | response_min_value      = -30.0
61 | response_mutate_power   = 0.0
62 | response_mutate_rate    = 0.0
63 | response_replace_rate   = 0.0
64 | 
65 | # connection weight options
66 | weight_init_mean        = 0.0
67 | weight_init_stdev       = 1.0
68 | weight_max_value        = 30.
69 | weight_min_value        = -30.
70 | weight_mutate_power     = 0.5
71 | weight_mutate_rate      = 0.8
72 | weight_replace_rate     = 0.1
73 | 
74 | [DefaultSpeciesSet]
75 | compatibility_threshold = 3.0
76 | 
77 | [DefaultStagnation]
78 | species_fitness_func = max
79 | max_stagnation       = 20
80 | species_elitism      = 4
81 | 
82 | [DefaultReproduction]
83 | elitism            = 2
84 | survival_threshold = 0.2


--------------------------------------------------------------------------------
/neat/speciation.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <!-- Created with matplotlib (https://matplotlib.org/) -->
  5 | <svg height="345.6pt" version="1.1" viewBox="0 0 460.8 345.6" width="460.8pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
  6 |  <defs>
  7 |   <style type="text/css">
  8 | *{stroke-linecap:butt;stroke-linejoin:round;}
  9 |   </style>
 10 |  </defs>
 11 |  <g id="figure_1">
 12 |   <g id="patch_1">
 13 |    <path d="M 0 345.6 
 14 | L 460.8 345.6 
 15 | L 460.8 0 
 16 | L 0 0 
 17 | z
 18 | " style="fill:#ffffff;"/>
 19 |   </g>
 20 |   <g id="axes_1">
 21 |    <g id="patch_2">
 22 |     <path d="M 57.6 307.584 
 23 | L 414.72 307.584 
 24 | L 414.72 41.472 
 25 | L 57.6 41.472 
 26 | z
 27 | " style="fill:#ffffff;"/>
 28 |    </g>
 29 |    <g id="PolyCollection_1">
 30 |     <path clip-path="url(#p8d551f5261)" d="M 73.832727 211.2768 
 31 | L 73.832727 307.584 
 32 | L 109.905455 307.584 
 33 | L 145.978182 307.584 
 34 | L 182.050909 307.584 
 35 | L 218.123636 307.584 
 36 | L 254.196364 307.584 
 37 | L 290.269091 307.584 
 38 | L 326.341818 307.584 
 39 | L 362.414545 307.584 
 40 | L 398.487273 307.584 
 41 | L 398.487273 272.1024 
 42 | L 398.487273 272.1024 
 43 | L 362.414545 287.3088 
 44 | L 326.341818 292.3776 
 45 | L 290.269091 104.832 
 46 | L 254.196364 284.7744 
 47 | L 218.123636 97.2288 
 48 | L 182.050909 289.8432 
 49 | L 145.978182 125.1072 
 50 | L 109.905455 211.2768 
 51 | L 73.832727 211.2768 
 52 | z
 53 | " style="fill:#1f77b4;"/>
 54 |    </g>
 55 |    <g id="PolyCollection_2">
 56 |     <path clip-path="url(#p8d551f5261)" d="M 73.832727 54.144 
 57 | L 73.832727 211.2768 
 58 | L 109.905455 211.2768 
 59 | L 145.978182 125.1072 
 60 | L 182.050909 289.8432 
 61 | L 218.123636 97.2288 
 62 | L 254.196364 284.7744 
 63 | L 290.269091 104.832 
 64 | L 326.341818 292.3776 
 65 | L 362.414545 287.3088 
 66 | L 398.487273 272.1024 
 67 | L 398.487273 89.6256 
 68 | L 398.487273 89.6256 
 69 | L 362.414545 241.6896 
 70 | L 326.341818 201.1392 
 71 | L 290.269091 87.0912 
 72 | L 254.196364 256.896 
 73 | L 218.123636 71.8848 
 74 | L 182.050909 170.7264 
 75 | L 145.978182 64.2816 
 76 | L 109.905455 54.144 
 77 | L 73.832727 54.144 
 78 | z
 79 | " style="fill:#ff7f0e;"/>
 80 |    </g>
 81 |    <g id="PolyCollection_3">
 82 |     <path clip-path="url(#p8d551f5261)" d="M 73.832727 54.144 
 83 | L 73.832727 54.144 
 84 | L 109.905455 54.144 
 85 | L 145.978182 64.2816 
 86 | L 182.050909 170.7264 
 87 | L 218.123636 71.8848 
 88 | L 254.196364 256.896 
 89 | L 290.269091 87.0912 
 90 | L 326.341818 201.1392 
 91 | L 362.414545 241.6896 
 92 | L 398.487273 89.6256 
 93 | L 398.487273 61.7472 
 94 | L 398.487273 61.7472 
 95 | L 362.414545 59.2128 
 96 | L 326.341818 54.144 
 97 | L 290.269091 54.144 
 98 | L 254.196364 54.144 
 99 | L 218.123636 56.6784 
100 | L 182.050909 54.144 
101 | L 145.978182 54.144 
102 | L 109.905455 54.144 
103 | L 73.832727 54.144 
104 | z
105 | " style="fill:#2ca02c;"/>
106 |    </g>
107 |    <g id="PolyCollection_4">
108 |     <path clip-path="url(#p8d551f5261)" d="M 73.832727 54.144 
109 | L 73.832727 54.144 
110 | L 109.905455 54.144 
111 | L 145.978182 54.144 
112 | L 182.050909 54.144 
113 | L 218.123636 56.6784 
114 | L 254.196364 54.144 
115 | L 290.269091 54.144 
116 | L 326.341818 54.144 
117 | L 362.414545 59.2128 
118 | L 398.487273 61.7472 
119 | L 398.487273 54.144 
120 | L 398.487273 54.144 
121 | L 362.414545 54.144 
122 | L 326.341818 54.144 
123 | L 290.269091 54.144 
124 | L 254.196364 54.144 
125 | L 218.123636 56.6784 
126 | L 182.050909 54.144 
127 | L 145.978182 54.144 
128 | L 109.905455 54.144 
129 | L 73.832727 54.144 
130 | z
131 | " style="fill:#d62728;"/>
132 |    </g>
133 |    <g id="matplotlib.axis_1">
134 |     <g id="xtick_1">
135 |      <g id="line2d_1">
136 |       <defs>
137 |        <path d="M 0 0 
138 | L 0 3.5 
139 | " id="m2bfe3a3566" style="stroke:#000000;stroke-width:0.8;"/>
140 |       </defs>
141 |       <g>
142 |        <use style="stroke:#000000;stroke-width:0.8;" x="73.832727" xlink:href="#m2bfe3a3566" y="307.584"/>
143 |       </g>
144 |      </g>
145 |      <g id="text_1">
146 |       <!-- 0 -->
147 |       <defs>
148 |        <path d="M 31.78125 66.40625 
149 | Q 24.171875 66.40625 20.328125 58.90625 
150 | Q 16.5 51.421875 16.5 36.375 
151 | Q 16.5 21.390625 20.328125 13.890625 
152 | Q 24.171875 6.390625 31.78125 6.390625 
153 | Q 39.453125 6.390625 43.28125 13.890625 
154 | Q 47.125 21.390625 47.125 36.375 
155 | Q 47.125 51.421875 43.28125 58.90625 
156 | Q 39.453125 66.40625 31.78125 66.40625 
157 | z
158 | M 31.78125 74.21875 
159 | Q 44.046875 74.21875 50.515625 64.515625 
160 | Q 56.984375 54.828125 56.984375 36.375 
161 | Q 56.984375 17.96875 50.515625 8.265625 
162 | Q 44.046875 -1.421875 31.78125 -1.421875 
163 | Q 19.53125 -1.421875 13.0625 8.265625 
164 | Q 6.59375 17.96875 6.59375 36.375 
165 | Q 6.59375 54.828125 13.0625 64.515625 
166 | Q 19.53125 74.21875 31.78125 74.21875 
167 | z
168 | " id="DejaVuSans-48"/>
169 |       </defs>
170 |       <g transform="translate(70.651477 322.182437)scale(0.1 -0.1)">
171 |        <use xlink:href="#DejaVuSans-48"/>
172 |       </g>
173 |      </g>
174 |     </g>
175 |     <g id="xtick_2">
176 |      <g id="line2d_2">
177 |       <g>
178 |        <use style="stroke:#000000;stroke-width:0.8;" x="145.978182" xlink:href="#m2bfe3a3566" y="307.584"/>
179 |       </g>
180 |      </g>
181 |      <g id="text_2">
182 |       <!-- 2 -->
183 |       <defs>
184 |        <path d="M 19.1875 8.296875 
185 | L 53.609375 8.296875 
186 | L 53.609375 0 
187 | L 7.328125 0 
188 | L 7.328125 8.296875 
189 | Q 12.9375 14.109375 22.625 23.890625 
190 | Q 32.328125 33.6875 34.8125 36.53125 
191 | Q 39.546875 41.84375 41.421875 45.53125 
192 | Q 43.3125 49.21875 43.3125 52.78125 
193 | Q 43.3125 58.59375 39.234375 62.25 
194 | Q 35.15625 65.921875 28.609375 65.921875 
195 | Q 23.96875 65.921875 18.8125 64.3125 
196 | Q 13.671875 62.703125 7.8125 59.421875 
197 | L 7.8125 69.390625 
198 | Q 13.765625 71.78125 18.9375 73 
199 | Q 24.125 74.21875 28.421875 74.21875 
200 | Q 39.75 74.21875 46.484375 68.546875 
201 | Q 53.21875 62.890625 53.21875 53.421875 
202 | Q 53.21875 48.921875 51.53125 44.890625 
203 | Q 49.859375 40.875 45.40625 35.40625 
204 | Q 44.1875 33.984375 37.640625 27.21875 
205 | Q 31.109375 20.453125 19.1875 8.296875 
206 | z
207 | " id="DejaVuSans-50"/>
208 |       </defs>
209 |       <g transform="translate(142.796932 322.182437)scale(0.1 -0.1)">
210 |        <use xlink:href="#DejaVuSans-50"/>
211 |       </g>
212 |      </g>
213 |     </g>
214 |     <g id="xtick_3">
215 |      <g id="line2d_3">
216 |       <g>
217 |        <use style="stroke:#000000;stroke-width:0.8;" x="218.123636" xlink:href="#m2bfe3a3566" y="307.584"/>
218 |       </g>
219 |      </g>
220 |      <g id="text_3">
221 |       <!-- 4 -->
222 |       <defs>
223 |        <path d="M 37.796875 64.3125 
224 | L 12.890625 25.390625 
225 | L 37.796875 25.390625 
226 | z
227 | M 35.203125 72.90625 
228 | L 47.609375 72.90625 
229 | L 47.609375 25.390625 
230 | L 58.015625 25.390625 
231 | L 58.015625 17.1875 
232 | L 47.609375 17.1875 
233 | L 47.609375 0 
234 | L 37.796875 0 
235 | L 37.796875 17.1875 
236 | L 4.890625 17.1875 
237 | L 4.890625 26.703125 
238 | z
239 | " id="DejaVuSans-52"/>
240 |       </defs>
241 |       <g transform="translate(214.942386 322.182437)scale(0.1 -0.1)">
242 |        <use xlink:href="#DejaVuSans-52"/>
243 |       </g>
244 |      </g>
245 |     </g>
246 |     <g id="xtick_4">
247 |      <g id="line2d_4">
248 |       <g>
249 |        <use style="stroke:#000000;stroke-width:0.8;" x="290.269091" xlink:href="#m2bfe3a3566" y="307.584"/>
250 |       </g>
251 |      </g>
252 |      <g id="text_4">
253 |       <!-- 6 -->
254 |       <defs>
255 |        <path d="M 33.015625 40.375 
256 | Q 26.375 40.375 22.484375 35.828125 
257 | Q 18.609375 31.296875 18.609375 23.390625 
258 | Q 18.609375 15.53125 22.484375 10.953125 
259 | Q 26.375 6.390625 33.015625 6.390625 
260 | Q 39.65625 6.390625 43.53125 10.953125 
261 | Q 47.40625 15.53125 47.40625 23.390625 
262 | Q 47.40625 31.296875 43.53125 35.828125 
263 | Q 39.65625 40.375 33.015625 40.375 
264 | z
265 | M 52.59375 71.296875 
266 | L 52.59375 62.3125 
267 | Q 48.875 64.0625 45.09375 64.984375 
268 | Q 41.3125 65.921875 37.59375 65.921875 
269 | Q 27.828125 65.921875 22.671875 59.328125 
270 | Q 17.53125 52.734375 16.796875 39.40625 
271 | Q 19.671875 43.65625 24.015625 45.921875 
272 | Q 28.375 48.1875 33.59375 48.1875 
273 | Q 44.578125 48.1875 50.953125 41.515625 
274 | Q 57.328125 34.859375 57.328125 23.390625 
275 | Q 57.328125 12.15625 50.6875 5.359375 
276 | Q 44.046875 -1.421875 33.015625 -1.421875 
277 | Q 20.359375 -1.421875 13.671875 8.265625 
278 | Q 6.984375 17.96875 6.984375 36.375 
279 | Q 6.984375 53.65625 15.1875 63.9375 
280 | Q 23.390625 74.21875 37.203125 74.21875 
281 | Q 40.921875 74.21875 44.703125 73.484375 
282 | Q 48.484375 72.75 52.59375 71.296875 
283 | z
284 | " id="DejaVuSans-54"/>
285 |       </defs>
286 |       <g transform="translate(287.087841 322.182437)scale(0.1 -0.1)">
287 |        <use xlink:href="#DejaVuSans-54"/>
288 |       </g>
289 |      </g>
290 |     </g>
291 |     <g id="xtick_5">
292 |      <g id="line2d_5">
293 |       <g>
294 |        <use style="stroke:#000000;stroke-width:0.8;" x="362.414545" xlink:href="#m2bfe3a3566" y="307.584"/>
295 |       </g>
296 |      </g>
297 |      <g id="text_5">
298 |       <!-- 8 -->
299 |       <defs>
300 |        <path d="M 31.78125 34.625 
301 | Q 24.75 34.625 20.71875 30.859375 
302 | Q 16.703125 27.09375 16.703125 20.515625 
303 | Q 16.703125 13.921875 20.71875 10.15625 
304 | Q 24.75 6.390625 31.78125 6.390625 
305 | Q 38.8125 6.390625 42.859375 10.171875 
306 | Q 46.921875 13.96875 46.921875 20.515625 
307 | Q 46.921875 27.09375 42.890625 30.859375 
308 | Q 38.875 34.625 31.78125 34.625 
309 | z
310 | M 21.921875 38.8125 
311 | Q 15.578125 40.375 12.03125 44.71875 
312 | Q 8.5 49.078125 8.5 55.328125 
313 | Q 8.5 64.0625 14.71875 69.140625 
314 | Q 20.953125 74.21875 31.78125 74.21875 
315 | Q 42.671875 74.21875 48.875 69.140625 
316 | Q 55.078125 64.0625 55.078125 55.328125 
317 | Q 55.078125 49.078125 51.53125 44.71875 
318 | Q 48 40.375 41.703125 38.8125 
319 | Q 48.828125 37.15625 52.796875 32.3125 
320 | Q 56.78125 27.484375 56.78125 20.515625 
321 | Q 56.78125 9.90625 50.3125 4.234375 
322 | Q 43.84375 -1.421875 31.78125 -1.421875 
323 | Q 19.734375 -1.421875 13.25 4.234375 
324 | Q 6.78125 9.90625 6.78125 20.515625 
325 | Q 6.78125 27.484375 10.78125 32.3125 
326 | Q 14.796875 37.15625 21.921875 38.8125 
327 | z
328 | M 18.3125 54.390625 
329 | Q 18.3125 48.734375 21.84375 45.5625 
330 | Q 25.390625 42.390625 31.78125 42.390625 
331 | Q 38.140625 42.390625 41.71875 45.5625 
332 | Q 45.3125 48.734375 45.3125 54.390625 
333 | Q 45.3125 60.0625 41.71875 63.234375 
334 | Q 38.140625 66.40625 31.78125 66.40625 
335 | Q 25.390625 66.40625 21.84375 63.234375 
336 | Q 18.3125 60.0625 18.3125 54.390625 
337 | z
338 | " id="DejaVuSans-56"/>
339 |       </defs>
340 |       <g transform="translate(359.233295 322.182437)scale(0.1 -0.1)">
341 |        <use xlink:href="#DejaVuSans-56"/>
342 |       </g>
343 |      </g>
344 |     </g>
345 |     <g id="text_6">
346 |      <!-- Generations -->
347 |      <defs>
348 |       <path d="M 59.515625 10.40625 
349 | L 59.515625 29.984375 
350 | L 43.40625 29.984375 
351 | L 43.40625 38.09375 
352 | L 69.28125 38.09375 
353 | L 69.28125 6.78125 
354 | Q 63.578125 2.734375 56.6875 0.65625 
355 | Q 49.8125 -1.421875 42 -1.421875 
356 | Q 24.90625 -1.421875 15.25 8.5625 
357 | Q 5.609375 18.5625 5.609375 36.375 
358 | Q 5.609375 54.25 15.25 64.234375 
359 | Q 24.90625 74.21875 42 74.21875 
360 | Q 49.125 74.21875 55.546875 72.453125 
361 | Q 61.96875 70.703125 67.390625 67.28125 
362 | L 67.390625 56.78125 
363 | Q 61.921875 61.421875 55.765625 63.765625 
364 | Q 49.609375 66.109375 42.828125 66.109375 
365 | Q 29.4375 66.109375 22.71875 58.640625 
366 | Q 16.015625 51.171875 16.015625 36.375 
367 | Q 16.015625 21.625 22.71875 14.15625 
368 | Q 29.4375 6.6875 42.828125 6.6875 
369 | Q 48.046875 6.6875 52.140625 7.59375 
370 | Q 56.25 8.5 59.515625 10.40625 
371 | z
372 | " id="DejaVuSans-71"/>
373 |       <path d="M 56.203125 29.59375 
374 | L 56.203125 25.203125 
375 | L 14.890625 25.203125 
376 | Q 15.484375 15.921875 20.484375 11.0625 
377 | Q 25.484375 6.203125 34.421875 6.203125 
378 | Q 39.59375 6.203125 44.453125 7.46875 
379 | Q 49.3125 8.734375 54.109375 11.28125 
380 | L 54.109375 2.78125 
381 | Q 49.265625 0.734375 44.1875 -0.34375 
382 | Q 39.109375 -1.421875 33.890625 -1.421875 
383 | Q 20.796875 -1.421875 13.15625 6.1875 
384 | Q 5.515625 13.8125 5.515625 26.8125 
385 | Q 5.515625 40.234375 12.765625 48.109375 
386 | Q 20.015625 56 32.328125 56 
387 | Q 43.359375 56 49.78125 48.890625 
388 | Q 56.203125 41.796875 56.203125 29.59375 
389 | z
390 | M 47.21875 32.234375 
391 | Q 47.125 39.59375 43.09375 43.984375 
392 | Q 39.0625 48.390625 32.421875 48.390625 
393 | Q 24.90625 48.390625 20.390625 44.140625 
394 | Q 15.875 39.890625 15.1875 32.171875 
395 | z
396 | " id="DejaVuSans-101"/>
397 |       <path d="M 54.890625 33.015625 
398 | L 54.890625 0 
399 | L 45.90625 0 
400 | L 45.90625 32.71875 
401 | Q 45.90625 40.484375 42.875 44.328125 
402 | Q 39.84375 48.1875 33.796875 48.1875 
403 | Q 26.515625 48.1875 22.3125 43.546875 
404 | Q 18.109375 38.921875 18.109375 30.90625 
405 | L 18.109375 0 
406 | L 9.078125 0 
407 | L 9.078125 54.6875 
408 | L 18.109375 54.6875 
409 | L 18.109375 46.1875 
410 | Q 21.34375 51.125 25.703125 53.5625 
411 | Q 30.078125 56 35.796875 56 
412 | Q 45.21875 56 50.046875 50.171875 
413 | Q 54.890625 44.34375 54.890625 33.015625 
414 | z
415 | " id="DejaVuSans-110"/>
416 |       <path d="M 41.109375 46.296875 
417 | Q 39.59375 47.171875 37.8125 47.578125 
418 | Q 36.03125 48 33.890625 48 
419 | Q 26.265625 48 22.1875 43.046875 
420 | Q 18.109375 38.09375 18.109375 28.8125 
421 | L 18.109375 0 
422 | L 9.078125 0 
423 | L 9.078125 54.6875 
424 | L 18.109375 54.6875 
425 | L 18.109375 46.1875 
426 | Q 20.953125 51.171875 25.484375 53.578125 
427 | Q 30.03125 56 36.53125 56 
428 | Q 37.453125 56 38.578125 55.875 
429 | Q 39.703125 55.765625 41.0625 55.515625 
430 | z
431 | " id="DejaVuSans-114"/>
432 |       <path d="M 34.28125 27.484375 
433 | Q 23.390625 27.484375 19.1875 25 
434 | Q 14.984375 22.515625 14.984375 16.5 
435 | Q 14.984375 11.71875 18.140625 8.90625 
436 | Q 21.296875 6.109375 26.703125 6.109375 
437 | Q 34.1875 6.109375 38.703125 11.40625 
438 | Q 43.21875 16.703125 43.21875 25.484375 
439 | L 43.21875 27.484375 
440 | z
441 | M 52.203125 31.203125 
442 | L 52.203125 0 
443 | L 43.21875 0 
444 | L 43.21875 8.296875 
445 | Q 40.140625 3.328125 35.546875 0.953125 
446 | Q 30.953125 -1.421875 24.3125 -1.421875 
447 | Q 15.921875 -1.421875 10.953125 3.296875 
448 | Q 6 8.015625 6 15.921875 
449 | Q 6 25.140625 12.171875 29.828125 
450 | Q 18.359375 34.515625 30.609375 34.515625 
451 | L 43.21875 34.515625 
452 | L 43.21875 35.40625 
453 | Q 43.21875 41.609375 39.140625 45 
454 | Q 35.0625 48.390625 27.6875 48.390625 
455 | Q 23 48.390625 18.546875 47.265625 
456 | Q 14.109375 46.140625 10.015625 43.890625 
457 | L 10.015625 52.203125 
458 | Q 14.9375 54.109375 19.578125 55.046875 
459 | Q 24.21875 56 28.609375 56 
460 | Q 40.484375 56 46.34375 49.84375 
461 | Q 52.203125 43.703125 52.203125 31.203125 
462 | z
463 | " id="DejaVuSans-97"/>
464 |       <path d="M 18.3125 70.21875 
465 | L 18.3125 54.6875 
466 | L 36.8125 54.6875 
467 | L 36.8125 47.703125 
468 | L 18.3125 47.703125 
469 | L 18.3125 18.015625 
470 | Q 18.3125 11.328125 20.140625 9.421875 
471 | Q 21.96875 7.515625 27.59375 7.515625 
472 | L 36.8125 7.515625 
473 | L 36.8125 0 
474 | L 27.59375 0 
475 | Q 17.1875 0 13.234375 3.875 
476 | Q 9.28125 7.765625 9.28125 18.015625 
477 | L 9.28125 47.703125 
478 | L 2.6875 47.703125 
479 | L 2.6875 54.6875 
480 | L 9.28125 54.6875 
481 | L 9.28125 70.21875 
482 | z
483 | " id="DejaVuSans-116"/>
484 |       <path d="M 9.421875 54.6875 
485 | L 18.40625 54.6875 
486 | L 18.40625 0 
487 | L 9.421875 0 
488 | z
489 | M 9.421875 75.984375 
490 | L 18.40625 75.984375 
491 | L 18.40625 64.59375 
492 | L 9.421875 64.59375 
493 | z
494 | " id="DejaVuSans-105"/>
495 |       <path d="M 30.609375 48.390625 
496 | Q 23.390625 48.390625 19.1875 42.75 
497 | Q 14.984375 37.109375 14.984375 27.296875 
498 | Q 14.984375 17.484375 19.15625 11.84375 
499 | Q 23.34375 6.203125 30.609375 6.203125 
500 | Q 37.796875 6.203125 41.984375 11.859375 
501 | Q 46.1875 17.53125 46.1875 27.296875 
502 | Q 46.1875 37.015625 41.984375 42.703125 
503 | Q 37.796875 48.390625 30.609375 48.390625 
504 | z
505 | M 30.609375 56 
506 | Q 42.328125 56 49.015625 48.375 
507 | Q 55.71875 40.765625 55.71875 27.296875 
508 | Q 55.71875 13.875 49.015625 6.21875 
509 | Q 42.328125 -1.421875 30.609375 -1.421875 
510 | Q 18.84375 -1.421875 12.171875 6.21875 
511 | Q 5.515625 13.875 5.515625 27.296875 
512 | Q 5.515625 40.765625 12.171875 48.375 
513 | Q 18.84375 56 30.609375 56 
514 | z
515 | " id="DejaVuSans-111"/>
516 |       <path d="M 44.28125 53.078125 
517 | L 44.28125 44.578125 
518 | Q 40.484375 46.53125 36.375 47.5 
519 | Q 32.28125 48.484375 27.875 48.484375 
520 | Q 21.1875 48.484375 17.84375 46.4375 
521 | Q 14.5 44.390625 14.5 40.28125 
522 | Q 14.5 37.15625 16.890625 35.375 
523 | Q 19.28125 33.59375 26.515625 31.984375 
524 | L 29.59375 31.296875 
525 | Q 39.15625 29.25 43.1875 25.515625 
526 | Q 47.21875 21.78125 47.21875 15.09375 
527 | Q 47.21875 7.46875 41.1875 3.015625 
528 | Q 35.15625 -1.421875 24.609375 -1.421875 
529 | Q 20.21875 -1.421875 15.453125 -0.5625 
530 | Q 10.6875 0.296875 5.421875 2 
531 | L 5.421875 11.28125 
532 | Q 10.40625 8.6875 15.234375 7.390625 
533 | Q 20.0625 6.109375 24.8125 6.109375 
534 | Q 31.15625 6.109375 34.5625 8.28125 
535 | Q 37.984375 10.453125 37.984375 14.40625 
536 | Q 37.984375 18.0625 35.515625 20.015625 
537 | Q 33.0625 21.96875 24.703125 23.78125 
538 | L 21.578125 24.515625 
539 | Q 13.234375 26.265625 9.515625 29.90625 
540 | Q 5.8125 33.546875 5.8125 39.890625 
541 | Q 5.8125 47.609375 11.28125 51.796875 
542 | Q 16.75 56 26.8125 56 
543 | Q 31.78125 56 36.171875 55.265625 
544 | Q 40.578125 54.546875 44.28125 53.078125 
545 | z
546 | " id="DejaVuSans-115"/>
547 |      </defs>
548 |      <g transform="translate(205.662344 335.860562)scale(0.1 -0.1)">
549 |       <use xlink:href="#DejaVuSans-71"/>
550 |       <use x="77.490234" xlink:href="#DejaVuSans-101"/>
551 |       <use x="139.013672" xlink:href="#DejaVuSans-110"/>
552 |       <use x="202.392578" xlink:href="#DejaVuSans-101"/>
553 |       <use x="263.916016" xlink:href="#DejaVuSans-114"/>
554 |       <use x="305.029297" xlink:href="#DejaVuSans-97"/>
555 |       <use x="366.308594" xlink:href="#DejaVuSans-116"/>
556 |       <use x="405.517578" xlink:href="#DejaVuSans-105"/>
557 |       <use x="433.300781" xlink:href="#DejaVuSans-111"/>
558 |       <use x="494.482422" xlink:href="#DejaVuSans-110"/>
559 |       <use x="557.861328" xlink:href="#DejaVuSans-115"/>
560 |      </g>
561 |     </g>
562 |    </g>
563 |    <g id="matplotlib.axis_2">
564 |     <g id="ytick_1">
565 |      <g id="line2d_6">
566 |       <defs>
567 |        <path d="M 0 0 
568 | L -3.5 0 
569 | " id="ma3592bd962" style="stroke:#000000;stroke-width:0.8;"/>
570 |       </defs>
571 |       <g>
572 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#ma3592bd962" y="307.584"/>
573 |       </g>
574 |      </g>
575 |      <g id="text_7">
576 |       <!-- 0 -->
577 |       <g transform="translate(44.2375 311.383219)scale(0.1 -0.1)">
578 |        <use xlink:href="#DejaVuSans-48"/>
579 |       </g>
580 |      </g>
581 |     </g>
582 |     <g id="ytick_2">
583 |      <g id="line2d_7">
584 |       <g>
585 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#ma3592bd962" y="256.896"/>
586 |       </g>
587 |      </g>
588 |      <g id="text_8">
589 |       <!-- 20 -->
590 |       <g transform="translate(37.875 260.695219)scale(0.1 -0.1)">
591 |        <use xlink:href="#DejaVuSans-50"/>
592 |        <use x="63.623047" xlink:href="#DejaVuSans-48"/>
593 |       </g>
594 |      </g>
595 |     </g>
596 |     <g id="ytick_3">
597 |      <g id="line2d_8">
598 |       <g>
599 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#ma3592bd962" y="206.208"/>
600 |       </g>
601 |      </g>
602 |      <g id="text_9">
603 |       <!-- 40 -->
604 |       <g transform="translate(37.875 210.007219)scale(0.1 -0.1)">
605 |        <use xlink:href="#DejaVuSans-52"/>
606 |        <use x="63.623047" xlink:href="#DejaVuSans-48"/>
607 |       </g>
608 |      </g>
609 |     </g>
610 |     <g id="ytick_4">
611 |      <g id="line2d_9">
612 |       <g>
613 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#ma3592bd962" y="155.52"/>
614 |       </g>
615 |      </g>
616 |      <g id="text_10">
617 |       <!-- 60 -->
618 |       <g transform="translate(37.875 159.319219)scale(0.1 -0.1)">
619 |        <use xlink:href="#DejaVuSans-54"/>
620 |        <use x="63.623047" xlink:href="#DejaVuSans-48"/>
621 |       </g>
622 |      </g>
623 |     </g>
624 |     <g id="ytick_5">
625 |      <g id="line2d_10">
626 |       <g>
627 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#ma3592bd962" y="104.832"/>
628 |       </g>
629 |      </g>
630 |      <g id="text_11">
631 |       <!-- 80 -->
632 |       <g transform="translate(37.875 108.631219)scale(0.1 -0.1)">
633 |        <use xlink:href="#DejaVuSans-56"/>
634 |        <use x="63.623047" xlink:href="#DejaVuSans-48"/>
635 |       </g>
636 |      </g>
637 |     </g>
638 |     <g id="ytick_6">
639 |      <g id="line2d_11">
640 |       <g>
641 |        <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#ma3592bd962" y="54.144"/>
642 |       </g>
643 |      </g>
644 |      <g id="text_12">
645 |       <!-- 100 -->
646 |       <defs>
647 |        <path d="M 12.40625 8.296875 
648 | L 28.515625 8.296875 
649 | L 28.515625 63.921875 
650 | L 10.984375 60.40625 
651 | L 10.984375 69.390625 
652 | L 28.421875 72.90625 
653 | L 38.28125 72.90625 
654 | L 38.28125 8.296875 
655 | L 54.390625 8.296875 
656 | L 54.390625 0 
657 | L 12.40625 0 
658 | z
659 | " id="DejaVuSans-49"/>
660 |       </defs>
661 |       <g transform="translate(31.5125 57.943219)scale(0.1 -0.1)">
662 |        <use xlink:href="#DejaVuSans-49"/>
663 |        <use x="63.623047" xlink:href="#DejaVuSans-48"/>
664 |        <use x="127.246094" xlink:href="#DejaVuSans-48"/>
665 |       </g>
666 |      </g>
667 |     </g>
668 |     <g id="text_13">
669 |      <!-- Size per Species -->
670 |      <defs>
671 |       <path d="M 53.515625 70.515625 
672 | L 53.515625 60.890625 
673 | Q 47.90625 63.578125 42.921875 64.890625 
674 | Q 37.9375 66.21875 33.296875 66.21875 
675 | Q 25.25 66.21875 20.875 63.09375 
676 | Q 16.5 59.96875 16.5 54.203125 
677 | Q 16.5 49.359375 19.40625 46.890625 
678 | Q 22.3125 44.4375 30.421875 42.921875 
679 | L 36.375 41.703125 
680 | Q 47.40625 39.59375 52.65625 34.296875 
681 | Q 57.90625 29 57.90625 20.125 
682 | Q 57.90625 9.515625 50.796875 4.046875 
683 | Q 43.703125 -1.421875 29.984375 -1.421875 
684 | Q 24.8125 -1.421875 18.96875 -0.25 
685 | Q 13.140625 0.921875 6.890625 3.21875 
686 | L 6.890625 13.375 
687 | Q 12.890625 10.015625 18.65625 8.296875 
688 | Q 24.421875 6.59375 29.984375 6.59375 
689 | Q 38.421875 6.59375 43.015625 9.90625 
690 | Q 47.609375 13.234375 47.609375 19.390625 
691 | Q 47.609375 24.75 44.3125 27.78125 
692 | Q 41.015625 30.8125 33.5 32.328125 
693 | L 27.484375 33.5 
694 | Q 16.453125 35.6875 11.515625 40.375 
695 | Q 6.59375 45.0625 6.59375 53.421875 
696 | Q 6.59375 63.09375 13.40625 68.65625 
697 | Q 20.21875 74.21875 32.171875 74.21875 
698 | Q 37.3125 74.21875 42.625 73.28125 
699 | Q 47.953125 72.359375 53.515625 70.515625 
700 | z
701 | " id="DejaVuSans-83"/>
702 |       <path d="M 5.515625 54.6875 
703 | L 48.1875 54.6875 
704 | L 48.1875 46.484375 
705 | L 14.40625 7.171875 
706 | L 48.1875 7.171875 
707 | L 48.1875 0 
708 | L 4.296875 0 
709 | L 4.296875 8.203125 
710 | L 38.09375 47.515625 
711 | L 5.515625 47.515625 
712 | z
713 | " id="DejaVuSans-122"/>
714 |       <path id="DejaVuSans-32"/>
715 |       <path d="M 18.109375 8.203125 
716 | L 18.109375 -20.796875 
717 | L 9.078125 -20.796875 
718 | L 9.078125 54.6875 
719 | L 18.109375 54.6875 
720 | L 18.109375 46.390625 
721 | Q 20.953125 51.265625 25.265625 53.625 
722 | Q 29.59375 56 35.59375 56 
723 | Q 45.5625 56 51.78125 48.09375 
724 | Q 58.015625 40.1875 58.015625 27.296875 
725 | Q 58.015625 14.40625 51.78125 6.484375 
726 | Q 45.5625 -1.421875 35.59375 -1.421875 
727 | Q 29.59375 -1.421875 25.265625 0.953125 
728 | Q 20.953125 3.328125 18.109375 8.203125 
729 | z
730 | M 48.6875 27.296875 
731 | Q 48.6875 37.203125 44.609375 42.84375 
732 | Q 40.53125 48.484375 33.40625 48.484375 
733 | Q 26.265625 48.484375 22.1875 42.84375 
734 | Q 18.109375 37.203125 18.109375 27.296875 
735 | Q 18.109375 17.390625 22.1875 11.75 
736 | Q 26.265625 6.109375 33.40625 6.109375 
737 | Q 40.53125 6.109375 44.609375 11.75 
738 | Q 48.6875 17.390625 48.6875 27.296875 
739 | z
740 | " id="DejaVuSans-112"/>
741 |       <path d="M 48.78125 52.59375 
742 | L 48.78125 44.1875 
743 | Q 44.96875 46.296875 41.140625 47.34375 
744 | Q 37.3125 48.390625 33.40625 48.390625 
745 | Q 24.65625 48.390625 19.8125 42.84375 
746 | Q 14.984375 37.3125 14.984375 27.296875 
747 | Q 14.984375 17.28125 19.8125 11.734375 
748 | Q 24.65625 6.203125 33.40625 6.203125 
749 | Q 37.3125 6.203125 41.140625 7.25 
750 | Q 44.96875 8.296875 48.78125 10.40625 
751 | L 48.78125 2.09375 
752 | Q 45.015625 0.34375 40.984375 -0.53125 
753 | Q 36.96875 -1.421875 32.421875 -1.421875 
754 | Q 20.0625 -1.421875 12.78125 6.34375 
755 | Q 5.515625 14.109375 5.515625 27.296875 
756 | Q 5.515625 40.671875 12.859375 48.328125 
757 | Q 20.21875 56 33.015625 56 
758 | Q 37.15625 56 41.109375 55.140625 
759 | Q 45.0625 54.296875 48.78125 52.59375 
760 | z
761 | " id="DejaVuSans-99"/>
762 |      </defs>
763 |      <g transform="translate(25.432812 215.520969)rotate(-90)scale(0.1 -0.1)">
764 |       <use xlink:href="#DejaVuSans-83"/>
765 |       <use x="63.476562" xlink:href="#DejaVuSans-105"/>
766 |       <use x="91.259766" xlink:href="#DejaVuSans-122"/>
767 |       <use x="143.75" xlink:href="#DejaVuSans-101"/>
768 |       <use x="205.273438" xlink:href="#DejaVuSans-32"/>
769 |       <use x="237.060547" xlink:href="#DejaVuSans-112"/>
770 |       <use x="300.537109" xlink:href="#DejaVuSans-101"/>
771 |       <use x="362.060547" xlink:href="#DejaVuSans-114"/>
772 |       <use x="403.173828" xlink:href="#DejaVuSans-32"/>
773 |       <use x="434.960938" xlink:href="#DejaVuSans-83"/>
774 |       <use x="498.4375" xlink:href="#DejaVuSans-112"/>
775 |       <use x="561.914062" xlink:href="#DejaVuSans-101"/>
776 |       <use x="623.4375" xlink:href="#DejaVuSans-99"/>
777 |       <use x="678.417969" xlink:href="#DejaVuSans-105"/>
778 |       <use x="706.201172" xlink:href="#DejaVuSans-101"/>
779 |       <use x="767.724609" xlink:href="#DejaVuSans-115"/>
780 |      </g>
781 |     </g>
782 |    </g>
783 |    <g id="patch_3">
784 |     <path d="M 57.6 307.584 
785 | L 57.6 41.472 
786 | " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>
787 |    </g>
788 |    <g id="patch_4">
789 |     <path d="M 414.72 307.584 
790 | L 414.72 41.472 
791 | " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>
792 |    </g>
793 |    <g id="patch_5">
794 |     <path d="M 57.6 307.584 
795 | L 414.72 307.584 
796 | " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>
797 |    </g>
798 |    <g id="patch_6">
799 |     <path d="M 57.6 41.472 
800 | L 414.72 41.472 
801 | " style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/>
802 |    </g>
803 |    <g id="text_14">
804 |     <!-- Speciation -->
805 |     <g transform="translate(204.714375 35.472)scale(0.12 -0.12)">
806 |      <use xlink:href="#DejaVuSans-83"/>
807 |      <use x="63.476562" xlink:href="#DejaVuSans-112"/>
808 |      <use x="126.953125" xlink:href="#DejaVuSans-101"/>
809 |      <use x="188.476562" xlink:href="#DejaVuSans-99"/>
810 |      <use x="243.457031" xlink:href="#DejaVuSans-105"/>
811 |      <use x="271.240234" xlink:href="#DejaVuSans-97"/>
812 |      <use x="332.519531" xlink:href="#DejaVuSans-116"/>
813 |      <use x="371.728516" xlink:href="#DejaVuSans-105"/>
814 |      <use x="399.511719" xlink:href="#DejaVuSans-111"/>
815 |      <use x="460.693359" xlink:href="#DejaVuSans-110"/>
816 |     </g>
817 |    </g>
818 |   </g>
819 |  </g>
820 |  <defs>
821 |   <clipPath id="p8d551f5261">
822 |    <rect height="266.112" width="357.12" x="57.6" y="41.472"/>
823 |   </clipPath>
824 |  </defs>
825 | </svg>
826 | 


--------------------------------------------------------------------------------
/neat/visualize.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import copy
  4 | import warnings
  5 | 
  6 | import graphviz
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | 
 10 | 
 11 | def plot_stats(statistics, ylog=False, view=False, filename='avg_fitness.svg'):
 12 |     """ Plots the population's average and best fitness. """
 13 |     if plt is None:
 14 |         warnings.warn("This display is not available due to a missing optional dependency (matplotlib)")
 15 |         return
 16 | 
 17 |     generation = range(len(statistics.most_fit_genomes))
 18 |     best_fitness = [c.fitness for c in statistics.most_fit_genomes]
 19 |     avg_fitness = np.array(statistics.get_fitness_mean())
 20 |     stdev_fitness = np.array(statistics.get_fitness_stdev())
 21 | 
 22 |     plt.plot(generation, avg_fitness, 'b-', label="average")
 23 |     #plt.plot(generation, avg_fitness - stdev_fitness, 'g-.', label="-1 sd")
 24 |     plt.plot(generation, avg_fitness + stdev_fitness, 'g-.', label="+1 sd")
 25 |     plt.plot(generation, best_fitness, 'r-', label="best")
 26 | 
 27 |     plt.title("Population's average and best fitness")
 28 |     plt.xlabel("Generations")
 29 |     plt.ylabel("Fitness")
 30 |     plt.grid()
 31 |     plt.legend(loc="best")
 32 |     if ylog:
 33 |         plt.gca().set_yscale('symlog')
 34 | 
 35 |     plt.savefig(filename)
 36 |     if view:
 37 |         plt.show()
 38 | 
 39 |     plt.close()
 40 | 
 41 | 
 42 | def plot_species(statistics, view=False, filename='speciation.svg'):
 43 |     """ Visualizes speciation throughout evolution. """
 44 |     if plt is None:
 45 |         warnings.warn("This display is not available due to a missing optional dependency (matplotlib)")
 46 |         return
 47 | 
 48 |     species_sizes = statistics.get_species_sizes()
 49 |     num_generations = len(species_sizes)
 50 |     curves = np.array(species_sizes).T
 51 | 
 52 |     fig, ax = plt.subplots()
 53 |     ax.stackplot(range(num_generations), *curves)
 54 | 
 55 |     plt.title("Speciation")
 56 |     plt.ylabel("Size per Species")
 57 |     plt.xlabel("Generations")
 58 | 
 59 |     plt.savefig(filename)
 60 | 
 61 |     if view:
 62 |         plt.show()
 63 | 
 64 |     plt.close()
 65 | 
 66 | 
 67 | def draw_net(config, genome, view=False, filename=None, node_names=None, show_disabled=True, prune_unused=False,
 68 |              node_colors=None, fmt='svg'):
 69 |     """ Receives a genome and draws a neural network with arbitrary topology. """
 70 |     # Attributes for network nodes.
 71 |     if graphviz is None:
 72 |         warnings.warn("This display is not available due to a missing optional dependency (graphviz)")
 73 |         return
 74 | 
 75 |     if node_names is None:
 76 |         node_names = {}
 77 | 
 78 |     assert type(node_names) is dict
 79 | 
 80 |     if node_colors is None:
 81 |         node_colors = {}
 82 | 
 83 |     assert type(node_colors) is dict
 84 | 
 85 |     node_attrs = {
 86 |         'shape': 'circle',
 87 |         'fontsize': '9',
 88 |         'height': '0.2',
 89 |         'width': '0.2'}
 90 | 
 91 |     dot = graphviz.Digraph(format=fmt, node_attr=node_attrs)
 92 | 
 93 |     inputs = set()
 94 |     for k in config.genome_config.input_keys:
 95 |         inputs.add(k)
 96 |         name = node_names.get(k, str(k))
 97 |         input_attrs = {'style': 'filled',
 98 |                        'shape': 'box'}
 99 |         input_attrs['fillcolor'] = node_colors.get(k, 'lightgray')
100 |         dot.node(name, _attributes=input_attrs)
101 | 
102 |     outputs = set()
103 |     for k in config.genome_config.output_keys:
104 |         outputs.add(k)
105 |         name = node_names.get(k, str(k))
106 |         node_attrs = {'style': 'filled'}
107 |         node_attrs['fillcolor'] = node_colors.get(k, 'lightblue')
108 | 
109 |         dot.node(name, _attributes=node_attrs)
110 | 
111 |     if prune_unused:
112 |         connections = set()
113 |         for cg in genome.connections.values():
114 |             if cg.enabled or show_disabled:
115 |                 connections.add(cg.key)
116 | 
117 |         used_nodes = copy.copy(outputs)
118 |         pending = copy.copy(outputs)
119 |         while pending:
120 |             #print(pending, used_nodes)
121 |             new_pending = set()
122 |             for a, b in connections:
123 |                 if b in pending and a not in used_nodes:
124 |                     new_pending.add(a)
125 |                     used_nodes.add(a)
126 |             pending = new_pending
127 |     else:
128 |         used_nodes = set(genome.nodes.keys())
129 | 
130 |     for n in used_nodes:
131 |         if n in inputs or n in outputs:
132 |             continue
133 | 
134 |         attrs = {'style': 'filled'}
135 |         attrs['fillcolor'] = node_colors.get(n, 'white')
136 |         dot.node(str(n), _attributes=attrs)
137 | 
138 |     for cg in genome.connections.values():
139 |         if cg.enabled or show_disabled:
140 |             #if cg.input not in used_nodes or cg.output not in used_nodes:
141 |             #    continue
142 |             input, output = cg.key
143 |             a = node_names.get(input, str(input))
144 |             b = node_names.get(output, str(output))
145 |             style = 'solid' if cg.enabled else 'dotted'
146 |             color = 'green' if cg.weight > 0 else 'red'
147 |             width = str(0.1 + abs(cg.weight / 5.0))
148 |             dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width})
149 | 
150 |     dot.render(filename, view=view)
151 | 
152 |     return dot


--------------------------------------------------------------------------------
/net.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class AtariNet(nn.Module):
 6 | 
 7 |     def __init__(self, num_actions):
 8 |         super(AtariNet, self).__init__()
 9 |         self.conv1 = nn.Sequential(
10 |             nn.Conv2d(1, 32, kernel_size=8, stride=4),
11 |             nn.ReLU()
12 |         )
13 |         self.conv2 = nn.Sequential(
14 |             nn.Conv2d(32, 64, kernel_size=4, stride=2),
15 |             nn.ReLU()
16 |         )
17 |         self.conv3 = nn.Sequential(
18 |             nn.Conv2d(64, 64, kernel_size=3, stride=1),
19 |             nn.ReLU()
20 |         )
21 |         self.hidden = nn.Sequential(
22 |             nn.Linear(64 * 7 * 7, 512, bias=True),
23 |             nn.ReLU()
24 |         )
25 |         self.out = nn.Sequential(
26 |             nn.Linear(512, num_actions, bias=True)
27 |         )
28 |         self.apply(self.init_weights)
29 | 
30 |     def init_weights(self, m):
31 |         if type(m) == nn.Conv2d:
32 |             m.weight.data.normal_(0.0, 0.02)
33 |         if type(m) == nn.Linear:
34 |             torch.nn.init.xavier_uniform_(m.weight)
35 |             m.bias.data.fill_(0.01)
36 | 
37 |     def forward(self, x):
38 |         x = self.conv1(x)
39 |         x = self.conv2(x)
40 |         x = self.conv3(x)
41 |         x = x.view(x.size(0), -1)
42 |         x = self.hidden(x)
43 |         x = self.out(x)
44 |         return x
45 | 
46 | 
47 | class CnnDQN(nn.Module):
48 |     def __init__(self, inputs_shape, num_actions):
49 |         super(CnnDQN, self).__init__()
50 | 
51 |         self.inut_shape = inputs_shape
52 |         self.num_actions = num_actions
53 | 
54 |         self.features = nn.Sequential(
55 |             nn.Conv2d(inputs_shape[0], 32, kernel_size=8, stride=4),
56 |             nn.ReLU(),
57 |             nn.Conv2d(32, 64, kernel_size=4, stride=2),
58 |             nn.ReLU(),
59 |             nn.Conv2d(64, 64, kernel_size=3, stride=1),
60 |             nn.ReLU()
61 |         )
62 | 
63 |         self.fc = nn.Sequential(
64 |             nn.Linear(self.features_size(), 512),
65 |             nn.ReLU(),
66 |             nn.Linear(512, self.num_actions)
67 |         )
68 | 
69 |     def forward(self, x):
70 |         x = self.features(x)
71 |         x = x.view(x.size(0), -1)
72 |         x = self.fc(x)
73 |         return x
74 | 
75 |     def features_size(self):
76 |         return self.features(torch.zeros(1, *self.inut_shape)).view(1, -1).size(1)
77 | 


--------------------------------------------------------------------------------
/nips-DQN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from collections import deque
  4 | import numpy as np
  5 | import gym
  6 | import random
  7 | from net import AtariNet
  8 | from util import preprocess
  9 | 
 10 | BATCH_SIZE = 32
 11 | LR = 0.001
 12 | START_EPSILON = 1.0
 13 | FINAL_EPSILON = 0.1
 14 | EPSILON = START_EPSILON
 15 | EXPLORE = 1000000
 16 | GAMMA = 0.99
 17 | TOTAL_EPISODES = 10000000
 18 | MEMORY_SIZE = 1000000
 19 | MEMORY_THRESHOLD = 100000
 20 | TEST_FREQUENCY = 1000
 21 | env = gym.make('Pong-v0')
 22 | env = env.unwrapped
 23 | ACTIONS_SIZE = env.action_space.n
 24 | 
 25 | 
 26 | class Agent(object):
 27 |     def __init__(self):
 28 |         self.network = AtariNet(ACTIONS_SIZE)
 29 |         self.memory = deque()
 30 |         self.optimizer = torch.optim.Adam(self.network.parameters(), lr=LR)
 31 |         self.loss_func = nn.MSELoss()
 32 | 
 33 |     def action(self, state, israndom):
 34 |         if israndom and random.random() < EPSILON:
 35 |             return np.random.randint(0, ACTIONS_SIZE)
 36 |         state = torch.unsqueeze(torch.FloatTensor(state), 0)
 37 |         actions_value = self.network.forward(state)
 38 |         return torch.max(actions_value, 1)[1].data.numpy()[0]
 39 | 
 40 |     def learn(self, state, action, reward, next_state, done):
 41 |         if done:
 42 |             self.memory.append((state, action, reward, next_state, 0))
 43 |         else:
 44 |             self.memory.append((state, action, reward, next_state, 1))
 45 |         if len(self.memory) > MEMORY_SIZE:
 46 |             self.memory.popleft()
 47 |         if len(self.memory) < MEMORY_THRESHOLD:
 48 |             return
 49 | 
 50 |         batch = random.sample(self.memory, BATCH_SIZE)
 51 |         state = torch.FloatTensor([x[0] for x in batch])
 52 |         action = torch.LongTensor([[x[1]] for x in batch])
 53 |         reward = torch.FloatTensor([[x[2]] for x in batch])
 54 |         next_state = torch.FloatTensor([x[3] for x in batch])
 55 |         done = torch.FloatTensor([[x[4]] for x in batch])
 56 | 
 57 |         eval_q = self.network.forward(state).gather(1, action)
 58 |         next_q = self.network(next_state).detach()
 59 |         target_q = reward + GAMMA * next_q.max(1)[0].view(BATCH_SIZE, 1) * done
 60 |         loss = self.loss_func(eval_q, target_q)
 61 | 
 62 |         self.optimizer.zero_grad()
 63 |         loss.backward()
 64 |         self.optimizer.step()
 65 | 
 66 | 
 67 | agent = Agent()
 68 | 
 69 | for i_episode in range(TOTAL_EPISODES):
 70 |     state = env.reset()
 71 |     state = preprocess(state)
 72 |     while True:
 73 |         # env.render()
 74 |         action = agent.action(state, True)
 75 |         next_state, reward, done, info = env.step(action)
 76 |         next_state = preprocess(next_state)
 77 |         agent.learn(state, action, reward, next_state, done)
 78 | 
 79 |         state = next_state
 80 |         if done:
 81 |             break
 82 |     if EPSILON > FINAL_EPSILON:
 83 |         EPSILON -= (START_EPSILON - FINAL_EPSILON) / EXPLORE
 84 | 
 85 |     # TEST
 86 |     if i_episode % TEST_FREQUENCY == 0:
 87 |         state = env.reset()
 88 |         state = preprocess(state)
 89 |         total_reward = 0
 90 |         while True:
 91 |             # env.render()
 92 |             action = agent.action(state, israndom=False)
 93 |             next_state, reward, done, info = env.step(action)
 94 |             next_state = preprocess(next_state)
 95 | 
 96 |             total_reward += reward
 97 | 
 98 |             state = next_state
 99 |             if done:
100 |                 break
101 |         print('episode: {} , total_reward: {}'.format(i_episode, round(total_reward, 3)))
102 | 
103 | env.close()
104 | 


--------------------------------------------------------------------------------
/recurrent_neural_network/main.py:
--------------------------------------------------------------------------------
  1 | import torch 
  2 | import torch.nn as nn
  3 | import torchvision
  4 | import torchvision.transforms as transforms
  5 | 
  6 | 
  7 | # Device configuration
  8 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  9 | 
 10 | # Hyper-parameters
 11 | sequence_length = 28
 12 | input_size = 28
 13 | hidden_size = 128
 14 | num_layers = 2
 15 | num_classes = 10
 16 | batch_size = 100
 17 | num_epochs = 2
 18 | learning_rate = 0.01
 19 | 
 20 | # MNIST dataset
 21 | train_dataset = torchvision.datasets.MNIST(root='../../data/',
 22 |                                            train=True, 
 23 |                                            transform=transforms.ToTensor(),
 24 |                                            download=True)
 25 | 
 26 | test_dataset = torchvision.datasets.MNIST(root='../../data/',
 27 |                                           train=False, 
 28 |                                           transform=transforms.ToTensor())
 29 | 
 30 | # Data loader
 31 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
 32 |                                            batch_size=batch_size, 
 33 |                                            shuffle=True)
 34 | 
 35 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
 36 |                                           batch_size=batch_size, 
 37 |                                           shuffle=False)
 38 | 
 39 | # Recurrent neural network (many-to-one)
 40 | class RNN(nn.Module):
 41 |     def __init__(self, input_size, hidden_size, num_layers, num_classes):
 42 |         super(RNN, self).__init__()
 43 |         self.hidden_size = hidden_size
 44 |         self.num_layers = num_layers
 45 |         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
 46 |         self.fc = nn.Linear(hidden_size, num_classes)
 47 |     
 48 |     def forward(self, x):
 49 |         # Set initial hidden and cell states 
 50 |         h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
 51 |         c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
 52 |         
 53 |         # Forward propagate LSTM
 54 |         out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
 55 |         
 56 |         # Decode the hidden state of the last time step
 57 |         out = self.fc(out[:, -1, :])
 58 |         return out
 59 | 
 60 | model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
 61 | 
 62 | 
 63 | # Loss and optimizer
 64 | criterion = nn.CrossEntropyLoss()
 65 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
 66 | 
 67 | # Train the model
 68 | total_step = len(train_loader)
 69 | for epoch in range(num_epochs):
 70 |     for i, (images, labels) in enumerate(train_loader):
 71 |         images = images.reshape(-1, sequence_length, input_size).to(device)
 72 |         labels = labels.to(device)
 73 |         
 74 |         # Forward pass
 75 |         outputs = model(images)
 76 |         loss = criterion(outputs, labels)
 77 |         
 78 |         # Backward and optimize
 79 |         optimizer.zero_grad()
 80 |         loss.backward()
 81 |         optimizer.step()
 82 |         
 83 |         if (i+1) % 100 == 0:
 84 |             print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
 85 |                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
 86 | 
 87 | # Test the model
 88 | with torch.no_grad():
 89 |     correct = 0
 90 |     total = 0
 91 |     for images, labels in test_loader:
 92 |         images = images.reshape(-1, sequence_length, input_size).to(device)
 93 |         labels = labels.to(device)
 94 |         outputs = model(images)
 95 |         _, predicted = torch.max(outputs.data, 1)
 96 |         total += labels.size(0)
 97 |         correct += (predicted == labels).sum().item()
 98 | 
 99 |     print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 
100 | 
101 | # Save the model checkpoint
102 | torch.save(model.state_dict(), 'model.ckpt')
103 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | 
 5 | def preprocess(observation):
 6 |     """
 7 |     image preprocess
 8 |     :param observation:
 9 |     :return:
10 |     """
11 |     observation = cv2.cvtColor(cv2.resize(observation, (84, 110)), cv2.COLOR_BGR2GRAY)
12 |     observation = observation[26:110,:]
13 |     ret, observation = cv2.threshold(observation,1,255,cv2.THRESH_BINARY)
14 |     x = np.reshape(observation,(84,84,1))
15 |     return x.transpose((2, 0, 1))


--------------------------------------------------------------------------------