├── .gitattributes
├── .gitignore
├── OpenAI
    ├── BipedalWalker-v2
    │   ├── BipedalWalker-v2.html
    │   ├── BipedalWalker-v2.ipynb
    │   ├── Model.py
    │   ├── README.md
    │   ├── ReplayBuffer.py
    │   └── preTrained
    │   │   ├── TD3_BipedalWalker-v2_0_solved_actor.pth
    │   │   ├── TD3_BipedalWalker-v2_0_solved_actor_target.pth
    │   │   ├── TD3_BipedalWalker-v2_0_solved_critic_1_target.pth
    │   │   ├── TD3_BipedalWalker-v2_0_solved_critic_2_target.pth
    │   │   ├── TD3_BipedalWalker-v2_0_solved_crtic_1.pth
    │   │   └── TD3_BipedalWalker-v2_0_solved_crtic_2.pth
    ├── BipedalWalker-v3
    │   ├── Agent.py
    │   ├── BipedalWalker-v3.html
    │   ├── BipedalWalker-v3.ipynb
    │   ├── README.md
    │   ├── ReplayBuffer.py
    │   └── preTrained
    │   │   ├── TD3_BipedalWalker-v3_0_solved_actor.pth
    │   │   ├── TD3_BipedalWalker-v3_0_solved_actor_target.pth
    │   │   ├── TD3_BipedalWalker-v3_0_solved_critic_1_target.pth
    │   │   ├── TD3_BipedalWalker-v3_0_solved_critic_2_target.pth
    │   │   ├── TD3_BipedalWalker-v3_0_solved_crtic_1.pth
    │   │   └── TD3_BipedalWalker-v3_0_solved_crtic_2.pth
    ├── CartPole-v0
    │   ├── .gitignore
    │   ├── CartPole-v0.ipynb
    │   ├── README.md
    │   ├── agents
    │   │   ├── DDQN.py
    │   │   └── __init__.py
    │   ├── assets
    │   │   ├── cartpole-v0.jpg
    │   │   └── game_reward.png
    │   ├── memory.py
    │   ├── model.h5
    │   └── requirements.txt
    ├── HumanoidPyBulletEnv-v0
    │   ├── HumanoidPyBulletEnv-v0.ipynb
    │   ├── README.md
    │   ├── multiprocessing_env.py
    │   └── pretrained
    │   │   ├── actor_HumanoidPyBulletEnv-v0_checkpoint.pt
    │   │   ├── actor_HumanoidPyBulletEnv-v0_final.pt
    │   │   ├── critic_HumanoidPyBulletEnv-v0_checkpoint.pt
    │   │   └── critic_HumanoidPyBulletEnv-v0_final.pt
    ├── LunarLander-v2
    │   ├── LunarLanderContinuous-v2 (DDPG).html
    │   ├── LunarLanderContinuous-v2 (DDPG).ipynb
    │   ├── README.md
    │   ├── checkpoint_actor.pth
    │   ├── checkpoint_critic.pth
    │   ├── ddpg_agent.py
    │   └── model.py
    ├── MountainCarContinuous-v0
    │   ├── Agent.py
    │   ├── Model.py
    │   ├── MountainCarContinuous-v0 (DDPG).html
    │   ├── MountainCarContinuous-v0 (DDPG).ipynb
    │   ├── Noise.py
    │   ├── README.md
    │   ├── checkpoint_actor.pth
    │   └── checkpoint_critic.pth
    ├── Taxi-v2
    │   ├── README.md
    │   ├── agent.py
    │   ├── main.py
    │   └── monitor.py
    └── Taxi-v3
    │   ├── Reinforcement Learning.ppsx
    │   └── Taxi-v3.ipynb
├── README.md
└── Unity-ML
    └── Soccer
        ├── Agent.py
        ├── Model.py
        ├── Noise.py
        ├── Soccer.ipynb
        ├── Soccer_Windows_x86_64
            ├── .DS_Store
            └── Soccer_Data
            │   ├── MonoBleedingEdge
            │       └── etc
            │       │   └── mono
            │       │       ├── 2.0
            │       │           ├── Browsers
            │       │           │   └── Compat.browser
            │       │           ├── DefaultWsdlHelpGenerator.aspx
            │       │           ├── machine.config
            │       │           ├── settings.map
            │       │           └── web.config
            │       │       ├── 4.0
            │       │           ├── Browsers
            │       │           │   └── Compat.browser
            │       │           ├── DefaultWsdlHelpGenerator.aspx
            │       │           ├── machine.config
            │       │           ├── settings.map
            │       │           └── web.config
            │       │       ├── 4.5
            │       │           ├── Browsers
            │       │           │   └── Compat.browser
            │       │           ├── DefaultWsdlHelpGenerator.aspx
            │       │           ├── machine.config
            │       │           ├── settings.map
            │       │           └── web.config
            │       │       ├── browscap.ini
            │       │       ├── config
            │       │       └── mconfig
            │       │           └── config.xml
            │   ├── Resources
            │       ├── unity default resources
            │       └── unity_builtin_extra
            │   ├── app.info
            │   ├── boot.config
            │   ├── globalgamemanagers
            │   ├── globalgamemanagers.assets
            │   ├── level0
            │   ├── resources.assets
            │   ├── sharedassets0.assets
            │   └── sharedassets0.assets.resS
        ├── checkpoint_goalie_actor.pth
        ├── checkpoint_goalie_critic.pth
        ├── checkpoint_striker_actor.pth
        └── checkpoint_striker_critic.pth


/.gitattributes:
--------------------------------------------------------------------------------
1 | Unity-ML/Soccer/Soccer_Windows_x86_64/* linguist-vendored
2 | *.html linguist-vendored
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # IntelliJ
107 | .idea/


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v2/Model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | 
  6 | # Hyperparameters
  7 | gamma = 0.99                # discount for future rewards
  8 | batch_size = 100            # num of transitions sampled from replay buffer
  9 | polyak = 0.995              # target policy update parameter (1-tau)
 10 | policy_noise = 0.2          # target policy smoothing noise
 11 | noise_clip = 0.5
 12 | policy_delay = 2            # delayed policy updates parameter
 13 | 
 14 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 15 | 
 16 | class Actor(nn.Module):
 17 |     def __init__(self, state_dim, action_dim, max_action):
 18 |         super(Actor, self).__init__()
 19 | 
 20 |         self.l1 = nn.Linear(state_dim, 400)
 21 |         self.l2 = nn.Linear(400, 300)
 22 |         self.l3 = nn.Linear(300, action_dim)
 23 | 
 24 |         self.max_action = max_action
 25 | 
 26 |     def forward(self, state):
 27 |         a = F.relu(self.l1(state))
 28 |         a = F.relu(self.l2(a))
 29 |         a = torch.tanh(self.l3(a)) * self.max_action
 30 |         return a
 31 | 
 32 | class Critic(nn.Module):
 33 |     def __init__(self, state_dim, action_dim):
 34 |         super(Critic, self).__init__()
 35 | 
 36 |         self.l1 = nn.Linear(state_dim + action_dim, 400)
 37 |         self.l2 = nn.Linear(400, 300)
 38 |         self.l3 = nn.Linear(300, 1)
 39 | 
 40 |     def forward(self, state, action):
 41 |         state_action = torch.cat([state, action], 1)
 42 | 
 43 |         q = F.relu(self.l1(state_action))
 44 |         q = F.relu(self.l2(q))
 45 |         q = self.l3(q)
 46 |         return q
 47 | 
 48 | class TD3:
 49 |     def __init__(self, state_dim, action_dim, max_action):
 50 | 
 51 |         self.actor = Actor(state_dim, action_dim, max_action).to(device)
 52 |         self.actor_target = Actor(state_dim, action_dim, max_action).to(device)
 53 |         self.actor_target.load_state_dict(self.actor.state_dict())
 54 |         self.actor_optimizer = optim.Adam(self.actor.parameters())
 55 | 
 56 |         self.critic_1 = Critic(state_dim, action_dim).to(device)
 57 |         self.critic_1_target = Critic(state_dim, action_dim).to(device)
 58 |         self.critic_1_target.load_state_dict(self.critic_1.state_dict())
 59 |         self.critic_1_optimizer = optim.Adam(self.critic_1.parameters())
 60 | 
 61 |         self.critic_2 = Critic(state_dim, action_dim).to(device)
 62 |         self.critic_2_target = Critic(state_dim, action_dim).to(device)
 63 |         self.critic_2_target.load_state_dict(self.critic_2.state_dict())
 64 |         self.critic_2_optimizer = optim.Adam(self.critic_2.parameters())
 65 | 
 66 |         self.max_action = max_action
 67 | 
 68 |     def select_action(self, state):
 69 |         state = torch.FloatTensor(state.reshape(1, -1)).to(device)
 70 |         return self.actor(state).cpu().data.numpy().flatten()
 71 | 
 72 |     def update(self, replay_buffer, n_iter):
 73 | 
 74 |         for i in range(n_iter):
 75 |             # Sample a batch of transitions from replay buffer:
 76 |             state, action_, reward, next_state, done = replay_buffer.sample(batch_size)
 77 |             state = torch.FloatTensor(state).to(device)
 78 |             action = torch.FloatTensor(action_).to(device)
 79 |             reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device)
 80 |             next_state = torch.FloatTensor(next_state).to(device)
 81 |             done = torch.FloatTensor(done).reshape((batch_size,1)).to(device)
 82 | 
 83 |             # Select next action according to target policy:
 84 |             noise = torch.FloatTensor(action_).data.normal_(0, policy_noise).to(device)
 85 |             noise = noise.clamp(-noise_clip, noise_clip)
 86 |             next_action = (self.actor_target(next_state) + noise)
 87 |             next_action = next_action.clamp(-self.max_action, self.max_action)
 88 | 
 89 |             # Compute target Q-value:
 90 |             target_Q1 = self.critic_1_target(next_state, next_action)
 91 |             target_Q2 = self.critic_2_target(next_state, next_action)
 92 |             target_Q = torch.min(target_Q1, target_Q2)
 93 |             target_Q = reward + ((1-done) * gamma * target_Q).detach()
 94 | 
 95 |             # Optimize Critic 1:
 96 |             current_Q1 = self.critic_1(state, action)
 97 |             loss_Q1 = F.mse_loss(current_Q1, target_Q)
 98 |             self.critic_1_optimizer.zero_grad()
 99 |             loss_Q1.backward()
100 |             self.critic_1_optimizer.step()
101 | 
102 |             # Optimize Critic 2:
103 |             current_Q2 = self.critic_2(state, action)
104 |             loss_Q2 = F.mse_loss(current_Q2, target_Q)
105 |             self.critic_2_optimizer.zero_grad()
106 |             loss_Q2.backward()
107 |             self.critic_2_optimizer.step()
108 | 
109 |             # Delayed policy updates:
110 |             if i % policy_delay == 0:
111 |                 # Compute actor loss:
112 |                 actor_loss = -self.critic_1(state, self.actor(state)).mean()
113 | 
114 |                 # Optimize the actor
115 |                 self.actor_optimizer.zero_grad()
116 |                 actor_loss.backward()
117 |                 self.actor_optimizer.step()
118 | 
119 |                 # Polyak averaging update:
120 |                 for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
121 |                     target_param.data.copy_( (polyak * target_param.data) + ((1-polyak) * param.data))
122 | 
123 |                 for param, target_param in zip(self.critic_1.parameters(), self.critic_1_target.parameters()):
124 |                     target_param.data.copy_( (polyak * target_param.data) + ((1-polyak) * param.data))
125 | 
126 |                 for param, target_param in zip(self.critic_2.parameters(), self.critic_2_target.parameters()):
127 |                     target_param.data.copy_( (polyak * target_param.data) + ((1-polyak) * param.data))
128 | 
129 | 
130 |     def save(self, directory, name):
131 |         torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name))
132 |         torch.save(self.actor_target.state_dict(), '%s/%s_actor_target.pth' % (directory, name))
133 | 
134 |         torch.save(self.critic_1.state_dict(), '%s/%s_crtic_1.pth' % (directory, name))
135 |         torch.save(self.critic_1_target.state_dict(), '%s/%s_critic_1_target.pth' % (directory, name))
136 | 
137 |         torch.save(self.critic_2.state_dict(), '%s/%s_crtic_2.pth' % (directory, name))
138 |         torch.save(self.critic_2_target.state_dict(), '%s/%s_critic_2_target.pth' % (directory, name))
139 | 
140 |     def load(self, directory, name):
141 |         self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location=lambda storage, loc: storage))
142 |         self.actor_target.load_state_dict(torch.load('%s/%s_actor_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
143 | 
144 |         self.critic_1.load_state_dict(torch.load('%s/%s_crtic_1.pth' % (directory, name), map_location=lambda storage, loc: storage))
145 |         self.critic_1_target.load_state_dict(torch.load('%s/%s_critic_1_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
146 | 
147 |         self.critic_2.load_state_dict(torch.load('%s/%s_crtic_2.pth' % (directory, name), map_location=lambda storage, loc: storage))
148 |         self.critic_2_target.load_state_dict(torch.load('%s/%s_critic_2_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
149 | 
150 | 
151 |     def load_actor(self, directory, name):
152 |         self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location=lambda storage, loc: storage))
153 |         self.actor_target.load_state_dict(torch.load('%s/%s_actor_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
154 | 


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v2/README.md:
--------------------------------------------------------------------------------
 1 | # BipedalWalker Problem
 2 | 
 3 | ### Getting Started
 4 | The environment to the BipedalWalker is described [here](https://github.com/openai/gym/wiki/BipedalWalker-v2).
 5 | 
 6 | ### Solution Video
 7 | [![BipedalWalker-v2](http://img.youtube.com/vi/QW6fWP5FDoU/0.jpg)](https://www.youtube.com/watch?v=QW6fWP5FDoU "BipedalWalker-v2")
 8 | 
 9 | The video shows in the first part the behaviour of the untrained agent and then in comparison the behaviour of the trained agent.
10 | 
11 | ### Solution Info
12 | My learning algorithm is a [Twin Delayed Deep Deterministic Policy Gradient algorithm (TD3)]([https://arxiv.org/pdf/1802.09477.pdf]).
13 | 
14 | ### Instructions
15 | 
16 | start Jupyter Notebook `BipedalWalker-v2.ipynb` and follow the instructions. 
17 | 


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v2/ReplayBuffer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class ReplayBuffer:
 4 |     def __init__(self):
 5 |         self.buffer = []
 6 | 
 7 |     def add(self, transition):
 8 |         self.buffer.append(transition)
 9 | 
10 |     def sample(self, batch_size):
11 |         indexes = np.random.randint(0, len(self.buffer), size=batch_size)
12 |         state, action, reward, next_state, done = [], [], [], [], []
13 | 
14 |         for i in indexes:
15 |             s, a, r, s_, d = self.buffer[i]
16 |             state.append(np.array(s, copy=False))
17 |             action.append(np.array(a, copy=False))
18 |             reward.append(np.array(r, copy=False))
19 |             next_state.append(np.array(s_, copy=False))
20 |             done.append(np.array(d, copy=False))
21 | 
22 |         return np.array(state), np.array(action), np.array(reward), np.array(next_state), np.array(done)
23 | 


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_actor.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_actor_target.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_actor_target.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_critic_1_target.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_critic_1_target.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_critic_2_target.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_critic_2_target.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_crtic_1.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_crtic_1.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_crtic_2.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v2/preTrained/TD3_BipedalWalker-v2_0_solved_crtic_2.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v3/Agent.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | import numpy as np
  6 | 
  7 | # Hyperparameters
  8 | gamma = 0.99                # discount for future rewards
  9 | batch_size = 100            # num of transitions sampled from replay buffer
 10 | polyak = 0.995              # target policy update parameter (1-tau)
 11 | policy_noise = 0.2          # target policy smoothing noise
 12 | noise_clip = 0.5
 13 | exploration_noise = 0.1
 14 | 
 15 | policy_delay = 2            # delayed policy updates parameter
 16 | LR_ACTOR = 0.001
 17 | LR_CRITIC = 0.001
 18 | 
 19 | WEIGHT_DECAY = 0.0
 20 | 
 21 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 22 | 
 23 | class Actor(nn.Module):
 24 |     def __init__(self, state_dim, action_dim, max_action):
 25 |         super(Actor, self).__init__()
 26 | 
 27 |         self.l1 = nn.Linear(state_dim, 400)
 28 |         self.l2 = nn.Linear(400, 300)
 29 |         self.l3 = nn.Linear(300, action_dim)
 30 | 
 31 |         self.max_action = max_action
 32 | 
 33 |     def forward(self, state):
 34 |         a = F.relu(self.l1(state))
 35 |         a = F.relu(self.l2(a))
 36 |         a = torch.tanh(self.l3(a)) * self.max_action
 37 |         return a
 38 | 
 39 | class Critic(nn.Module):
 40 |     def __init__(self, state_dim, action_dim):
 41 |         super(Critic, self).__init__()
 42 | 
 43 |         self.l1 = nn.Linear(state_dim + action_dim, 400)
 44 |         self.l2 = nn.Linear(400, 300)
 45 |         self.l3 = nn.Linear(300, 1)
 46 | 
 47 |     def forward(self, state, action):
 48 |         state_action = torch.cat([state, action], 1)
 49 | 
 50 |         q = F.relu(self.l1(state_action))
 51 |         q = F.relu(self.l2(q))
 52 |         q = self.l3(q)
 53 |         return q
 54 | 
 55 | class TD3:
 56 |     def __init__(self, env):
 57 |         self.env = env
 58 | 
 59 |         state_dim = env.observation_space.shape[0]
 60 |         action_dim = env.action_space.shape[0]
 61 |         max_action = float(env.action_space.high[0])
 62 | 
 63 |         self.actor = Actor(state_dim, action_dim, max_action).to(device)
 64 |         self.actor_target = Actor(state_dim, action_dim, max_action).to(device)
 65 |         self.actor_target.load_state_dict(self.actor.state_dict())
 66 |         self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=LR_ACTOR)
 67 | 
 68 |         self.critic_1 = Critic(state_dim, action_dim).to(device)
 69 |         self.critic_1_target = Critic(state_dim, action_dim).to(device)
 70 |         self.critic_1_target.load_state_dict(self.critic_1.state_dict())
 71 |         self.critic_1_optimizer = optim.Adam(self.critic_1.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
 72 | 
 73 |         self.critic_2 = Critic(state_dim, action_dim).to(device)
 74 |         self.critic_2_target = Critic(state_dim, action_dim).to(device)
 75 |         self.critic_2_target.load_state_dict(self.critic_2.state_dict())
 76 |         self.critic_2_optimizer = optim.Adam(self.critic_2.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
 77 | 
 78 |         self.max_action = max_action
 79 | 
 80 |     def select_action(self, state):
 81 |         state = torch.FloatTensor(state.reshape(1, -1)).to(device)
 82 |         action = self.actor(state).cpu().data.numpy().flatten()
 83 | 
 84 |         if exploration_noise != 0:
 85 |             action = (action + np.random.normal(0, exploration_noise, size=self.env.action_space.shape[0]))
 86 | 
 87 |         return action.clip(self.env.action_space.low, self.env.action_space.high)
 88 | 
 89 |     def update(self, replay_buffer, n_iter):
 90 |         for i in range(n_iter):
 91 |             state, action, reward, next_state, done = replay_buffer.sample(batch_size)
 92 | 
 93 |             state = torch.FloatTensor(state).to(device)
 94 |             action = torch.FloatTensor(action).to(device)
 95 |             reward = torch.FloatTensor(reward).to(device)
 96 |             next_state = torch.FloatTensor(next_state).to(device)
 97 |             done = torch.FloatTensor(done).to(device)
 98 | 
 99 |             # Select next action according to target policy:
100 |             noise = torch.empty_like(action).data.normal_(0, policy_noise).to(device)
101 |             noise = noise.clamp(-noise_clip, noise_clip)
102 |             next_action = (self.actor_target(next_state) + noise)
103 |             next_action = next_action.clamp(-self.max_action, self.max_action)
104 | 
105 |             # Compute target Q-value:
106 |             target_Q1 = self.critic_1_target(next_state, next_action)
107 |             target_Q2 = self.critic_2_target(next_state, next_action)
108 |             target_Q = torch.min(target_Q1, target_Q2)
109 |             target_Q = reward + ((1-done) * gamma * target_Q).detach()
110 | 
111 |             # Optimize Critic 1:
112 |             current_Q1 = self.critic_1(state, action)
113 |             loss_Q1 = F.mse_loss(current_Q1, target_Q)
114 | 
115 |             self.critic_1_optimizer.zero_grad()
116 |             loss_Q1.backward()
117 |             self.critic_1_optimizer.step()
118 | 
119 |             # Optimize Critic 2:
120 |             current_Q2 = self.critic_2(state, action)
121 |             loss_Q2 = F.mse_loss(current_Q2, target_Q)
122 | 
123 |             self.critic_2_optimizer.zero_grad()
124 |             loss_Q2.backward()
125 |             self.critic_2_optimizer.step()
126 | 
127 |             # Delayed policy updates:
128 |             if i % policy_delay == 0:
129 |                 # Compute actor loss:
130 |                 actor_loss = -self.critic_1(state, self.actor(state)).mean()
131 | 
132 |                 # Optimize the actor
133 |                 self.actor_optimizer.zero_grad()
134 |                 actor_loss.backward()
135 |                 self.actor_optimizer.step()
136 | 
137 |                 # Polyak averaging update:
138 |                 for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
139 |                     target_param.data.copy_( (polyak * target_param.data) + ((1-polyak) * param.data))
140 | 
141 |                 for param, target_param in zip(self.critic_1.parameters(), self.critic_1_target.parameters()):
142 |                     target_param.data.copy_( (polyak * target_param.data) + ((1-polyak) * param.data))
143 | 
144 |                 for param, target_param in zip(self.critic_2.parameters(), self.critic_2_target.parameters()):
145 |                     target_param.data.copy_( (polyak * target_param.data) + ((1-polyak) * param.data))
146 | 
147 | 
148 |     def save(self, directory, name):
149 |         torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name))
150 |         torch.save(self.actor_target.state_dict(), '%s/%s_actor_target.pth' % (directory, name))
151 | 
152 |         torch.save(self.critic_1.state_dict(), '%s/%s_crtic_1.pth' % (directory, name))
153 |         torch.save(self.critic_1_target.state_dict(), '%s/%s_critic_1_target.pth' % (directory, name))
154 | 
155 |         torch.save(self.critic_2.state_dict(), '%s/%s_crtic_2.pth' % (directory, name))
156 |         torch.save(self.critic_2_target.state_dict(), '%s/%s_critic_2_target.pth' % (directory, name))
157 | 
158 |     def load(self, directory, name):
159 |         self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location=lambda storage, loc: storage))
160 |         self.actor_target.load_state_dict(torch.load('%s/%s_actor_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
161 | 
162 |         self.critic_1.load_state_dict(torch.load('%s/%s_crtic_1.pth' % (directory, name), map_location=lambda storage, loc: storage))
163 |         self.critic_1_target.load_state_dict(torch.load('%s/%s_critic_1_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
164 | 
165 |         self.critic_2.load_state_dict(torch.load('%s/%s_crtic_2.pth' % (directory, name), map_location=lambda storage, loc: storage))
166 |         self.critic_2_target.load_state_dict(torch.load('%s/%s_critic_2_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
167 | 
168 | 
169 |     def load_actor(self, directory, name):
170 |         self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location=lambda storage, loc: storage))
171 |         self.actor_target.load_state_dict(torch.load('%s/%s_actor_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
172 | 


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v3/README.md:
--------------------------------------------------------------------------------
 1 | # BipedalWalker Problem
 2 | 
 3 | ### Getting Started
 4 | The environment to the BipedalWalker is described [here](https://github.com/openai/gym/wiki/BipedalWalker-v2).  
 5 | 
 6 | ### Solution Video
 7 | [![BipedalWalker-v3](http://img.youtube.com/vi/14yGAsIG-Rs/0.jpg)](https://www.youtube.com/watch?v=14yGAsIG-Rs "BipedalWalker-v3")
 8 | 
 9 | The video shows in the first part the behaviour of the untrained agent and then in comparison the behaviour of the trained agent.
10 | 
11 | ### Solution Info
12 | My learning algorithm is a [Twin Delayed Deep Deterministic Policy Gradient algorithm (TD3)]([https://arxiv.org/pdf/1802.09477.pdf]).
13 | 
14 | ### Instructions
15 | 
16 | start Jupyter Notebook `BipedalWalker-v3.ipynb` and follow the instructions. 
17 | 


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v3/ReplayBuffer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class ReplayBuffer:
 4 |     def __init__(self, max_size=1000000):
 5 |         self.buffer = []
 6 |         self.max_size = int(max_size)
 7 |         self.size = 0
 8 | 
 9 |     def add(self, transition):
10 |         self.size +=1
11 |         # transiton is tuple of (state, action, reward, next_state, done)
12 |         self.buffer.append(transition)
13 | 
14 |     def sample(self, batch_size):
15 |         # delete 1/5th of the buffer when full
16 |         if self.size > self.max_size:
17 |             del self.buffer[0:int(self.size/5)]
18 |             self.size = len(self.buffer)
19 | 
20 |         indexes = np.random.randint(0, len(self.buffer), size=batch_size)
21 |         state, action, reward, next_state, done = [], [], [], [], []
22 | 
23 |         for i in indexes:
24 |             s, a, r, s_, d = self.buffer[i]
25 |             state.append(np.array(s, copy=False))
26 |             action.append(np.array(a, copy=False))
27 |             reward.append(np.array(r, copy=False))
28 |             next_state.append(np.array(s_, copy=False))
29 |             done.append(np.array(d, copy=False))
30 | 
31 |         return np.array(state), np.array(action), np.array(reward).reshape(-1, 1), np.array(next_state), np.array(done).reshape(-1, 1)
32 | 


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_actor.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_actor_target.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_actor_target.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_critic_1_target.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_critic_1_target.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_critic_2_target.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_critic_2_target.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_crtic_1.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_crtic_1.pth


--------------------------------------------------------------------------------
/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_crtic_2.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/BipedalWalker-v3/preTrained/TD3_BipedalWalker-v3_0_solved_crtic_2.pth


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | /logs
3 | 


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/CartPole-v0.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Using TensorFlow backend.\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "import gym\n",
 18 |     "import sys\n",
 19 |     "import numpy as np\n",
 20 |     "import random as rn\n",
 21 |     "import cv2\n",
 22 |     "import time\n",
 23 |     "import functools\n",
 24 |     "import datetime\n",
 25 |     "import tensorflow as tf\n",
 26 |     "from agents.DDQN import *\n",
 27 |     "from IPython import display\n",
 28 |     "import matplotlib\n",
 29 |     "import matplotlib.pyplot as plt\n",
 30 |     "from matplotlib import gridspec\n",
 31 |     "%matplotlib inline\n",
 32 |     "\n",
 33 |     "np.set_printoptions(threshold=sys.maxsize)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "#### Set seed"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 2,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "SEED = 789325\n",
 50 |     "\n",
 51 |     "rn.seed(SEED)\n",
 52 |     "np.random.seed(SEED)\n",
 53 |     "tf.random.set_seed(SEED)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "#### Environment"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 3,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "def build_environment(envName=\"CartPole-v0\", seed=None):\n",
 70 |     "    env = gym.make(envName)        \n",
 71 |     "    if seed is not None:\n",
 72 |     "        env.seed(seed)    \n",
 73 |     "    \n",
 74 |     "    return env"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "#### Show Environment information"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 4,
 87 |    "metadata": {
 88 |     "scrolled": true
 89 |    },
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "Actions:  2\n",
 96 |       "Size of state: 4\n"
 97 |      ]
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "env = build_environment(seed=SEED)\n",
102 |     "\n",
103 |     "# size of each action\n",
104 |     "action_size = env.action_space.n\n",
105 |     "print('Actions: ', action_size)\n",
106 |     "if hasattr(env.env, 'get_action_meanings'):\n",
107 |     "    print(env.env.get_action_meanings())\n",
108 |     "\n",
109 |     "# examine the state space \n",
110 |     "states = env.observation_space.shape\n",
111 |     "state_size = states[0]\n",
112 |     "print('Size of state:', state_size)\n",
113 |     "\n",
114 |     "env.close()"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "# Training"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 5,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "def build_agent(pre_trained=None):\n",
131 |     "    return DDQNAgent(state_size,\n",
132 |     "                     action_size,\n",
133 |     "                     buffer_size=2000,\n",
134 |     "                     epsilon_start=0.5,\n",
135 |     "                     epsilon_steps_to_min=3500,\n",
136 |     "                     mode=\"DuelingDQN\",\n",
137 |     "                     use_PER=True,\n",
138 |     "                     pre_trained=pre_trained)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 6,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "logdir = \"logs/\" + time.strftime(\"%Y%m%d_%H%M%S\")\n",
148 |     "writer = tf.summary.create_file_writer(logdir)"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 7,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "SAVE_EVERY_EPISODES = 100\n",
158 |     "LEARNING_START_AFTER_STEPS = 500\n",
159 |     "EPISODES = 80\n",
160 |     "SCORE_TO_SOLVE = 195.0\n",
161 |     "\n",
162 |     "UPDATE_MODE = 'soft'\n",
163 |     "UPDATE_TARGET_FREQUENCY = 10"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 8,
169 |    "metadata": {},
170 |    "outputs": [
171 |     {
172 |      "name": "stdout",
173 |      "output_type": "stream",
174 |      "text": [
175 |       "Training started: 2019-12-29 13:29:18.235840\n",
176 |       "Episode 1: Step 15 reward 15.0: \n",
177 |       "Save model...\n",
178 |       "Episode 5: Step 80 reward 30.0: \n",
179 |       "Save model...\n",
180 |       "Episode 6: Step 123 reward 43.0: \n",
181 |       "Save model...\n",
182 |       "Episode 7: Step 180 reward 57.0: \n",
183 |       "Save model...\n"
184 |      ]
185 |     },
186 |     {
187 |      "name": "stderr",
188 |      "output_type": "stream",
189 |      "text": [
190 |       "D:\\Deep Learning\\Reinforcement-Learning\\OpenAI\\CartPole-v0\\memory.py:47: RuntimeWarning: divide by zero encountered in double_scalars\n",
191 |       "  max_weight = (p_min * n) ** (-self.PER_b)\n",
192 |       "D:\\Deep Learning\\Reinforcement-Learning\\OpenAI\\CartPole-v0\\memory.py:47: RuntimeWarning: divide by zero encountered in double_scalars\n",
193 |       "  max_weight = (p_min * n) ** (-self.PER_b)\n"
194 |      ]
195 |     },
196 |     {
197 |      "name": "stdout",
198 |      "output_type": "stream",
199 |      "text": [
200 |       "Episode 46: Step 860 reward 58.0: \n",
201 |       "Save model...\n",
202 |       "Episode 47: Step 947 reward 87.0: \n",
203 |       "Save model...\n",
204 |       "Episode 61: Step 1757 reward 92.0: \n",
205 |       "Save model...\n",
206 |       "Episode 65: Step 2039 reward 105.0: \n",
207 |       "Save model...\n",
208 |       "Episode 67: Step 2269 reward 133.0: \n",
209 |       "Save model...\n",
210 |       "Episode 68: Step 2469 reward 200.0: \n",
211 |       "Save model...\n",
212 |       "Save model...\n",
213 |       "Training finished\n"
214 |      ]
215 |     }
216 |    ],
217 |    "source": [
218 |     "def train():\n",
219 |     "    env = build_environment(seed=SEED)\n",
220 |     "    agent = build_agent()\n",
221 |     "    \n",
222 |     "    max_reward = -9999999    \n",
223 |     "    game_rewards_deque = deque(maxlen=100)    \n",
224 |     "    frame_count = 0\n",
225 |     "    \n",
226 |     "    print(\"Training started: \" + str(datetime.datetime.now()))\n",
227 |     "    \n",
228 |     "    frame_count = 0\n",
229 |     "    \n",
230 |     "    for i_episode in range(1, EPISODES+1):\n",
231 |     "        state = env.reset()\n",
232 |     "            \n",
233 |     "        game_reward = 0\n",
234 |     "        steps = 0\n",
235 |     "        \n",
236 |     "        while True:\n",
237 |     "            frame_count += 1\n",
238 |     "            steps += 1\n",
239 |     "            \n",
240 |     "            state = agent.preprocess(state)                \n",
241 |     "            action = agent.act(state)            \n",
242 |     "            \n",
243 |     "            next_state, reward, done, info = env.step(action)              \n",
244 |     "            game_reward += reward\n",
245 |     "        \n",
246 |     "            agent.remember(state[0], action, reward, next_state, done)\n",
247 |     "            \n",
248 |     "            state = next_state\n",
249 |     "            \n",
250 |     "            if frame_count % 10000 == 0:\n",
251 |     "                print(\"Step count: {}\".format(frame_count))\n",
252 |     "            \n",
253 |     "            if done:\n",
254 |     "                break            \n",
255 |     "            \n",
256 |     "            if frame_count > LEARNING_START_AFTER_STEPS:                \n",
257 |     "                agent.train()\n",
258 |     "                if UPDATE_MODE == \"soft\":\n",
259 |     "                    agent.soft_update_target_network()\n",
260 |     "                \n",
261 |     "            \n",
262 |     "            if UPDATE_MODE == \"hard\" and frame_count % UPDATE_TARGET_FREQUENCY == 0:\n",
263 |     "                agent.hard_update_target_network()\n",
264 |     "    \n",
265 |     "        # Log episode reward\n",
266 |     "        with writer.as_default():\n",
267 |     "            tf.summary.scalar(\"epsilon\", agent.epsilon, step=i_episode)\n",
268 |     "            tf.summary.scalar(\"game_reward\", game_reward, step=i_episode)        \n",
269 |     "            \n",
270 |     "        if i_episode % SAVE_EVERY_EPISODES == 0:\n",
271 |     "            print(\"Save after {} episodes.\".format(i_episode))\n",
272 |     "            agent.save()             \n",
273 |     "        \n",
274 |     "        game_rewards_deque.append(game_reward)\n",
275 |     "        \n",
276 |     "        if game_reward > max_reward:\n",
277 |     "            print(\"Episode {}: Step {} reward {}: \".format(i_episode, frame_count, game_reward))\n",
278 |     "            max_reward = game_reward\n",
279 |     "            agent.save()        \n",
280 |     "        \n",
281 |     "        if np.mean(game_rewards_deque) >= SCORE_TO_SOLVE:\n",
282 |     "            agent.save()\n",
283 |     "            print(\"Solved in Episode {} Step {} reward {}: \".format(i_episode, frame_count, game_reward))\n",
284 |     "            break      \n",
285 |     "    \n",
286 |     "    env.close()\n",
287 |     "    agent.save()\n",
288 |     "   \n",
289 |     "train()\n",
290 |     "print(\"Training finished\")"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "# Show Result"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": 9,
303 |    "metadata": {},
304 |    "outputs": [
305 |     {
306 |      "name": "stdout",
307 |      "output_type": "stream",
308 |      "text": [
309 |       "Episode finished with score: 161.0\n"
310 |      ]
311 |     },
312 |     {
313 |      "data": {
314 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD8CAYAAABXe05zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAARYUlEQVR4nO3df6zddX3H8edrpVYjJMK4kNofo3M1GZhZ3E1nwrIwcdKxH8U/WEoy0z9Iyh+QaGaygSZD/mjiFn/sn2FWB7HZ1K6JEjrCNmunMSaO0mLBllK5SoVrm7bojLA/6lre++N+Ow7ltvf03nu8/ZzzfCQn5/v9fL/fc95vQl98+ZzP6UlVIUlqx68sdAGSpAtjcEtSYwxuSWqMwS1JjTG4JakxBrckNWZgwZ1kXZJDSSaS3DOo95GkUZNBrONOsgj4PvAHwCTwBHB7VT0z728mSSNmUHfca4GJqvphVf0C2AasH9B7SdJIuWRAr7sMeLFnfxL4nXOdfOWVV9Y111wzoFIkqT2HDx/mpZdeynTHBhXc073Z6+ZkkmwCNgGsXLmSPXv2DKgUSWrP+Pj4OY8NaqpkEljRs78cONJ7QlVtqarxqhofGxsbUBmSNHwGFdxPAKuTrEryJmADsGNA7yVJI2UgUyVVdSrJ3cB/AIuAh6rqwCDeS5JGzaDmuKmqx4DHBvX6kjSq/OakJDXG4JakxhjcktQYg1uSGmNwS1JjDG5JaozBLUmNMbglqTEGtyQ1xuCWpMYY3JLUGINbkhpjcEtSYwxuSWqMwS1JjTG4JakxBrckNcbglqTGzOmny5IcBl4GTgOnqmo8yRXAvwDXAIeBP6uq/55bmZKkM+bjjvv3q2pNVY13+/cAu6pqNbCr25ckzZNBTJWsB7Z221uBWwfwHpI0suYa3AV8LcneJJu6saur6ihA93zVHN9DktRjTnPcwA1VdSTJVcDOJM/2e2EX9JsAVq5cOccyJGl0zOmOu6qOdM/HgYeBtcCxJEsBuufj57h2S1WNV9X42NjYXMqQpJEy6+BO8tYkl53ZBj4A7Ad2ABu70zYCj8y1SEnSa+YyVXI18HCSM6/zpar69yRPANuT3AG8ANw29zIlSWfMOrir6ofAu6cZ/wlw01yKkiSdm9+clKTGGNyS1BiDW5IaY3BLUmMMbklqjMEtSY0xuCWpMQa3JDXG4JakxhjcktQYg1uSGmNwS1JjDG5JaozBLUmNMbglqTEGtyQ1xuCWpMYY3JLUGINbkhozY3AneSjJ8ST7e8auSLIzyXPd8+U9x+5NMpHkUJKbB1W4JI2qfu64vwCsO2vsHmBXVa0GdnX7JLkW2ABc113zQJJF81atJGnm4K6qbwE/PWt4PbC1294K3Nozvq2qTlbV88AEsHaeapUkMfs57qur6ihA93xVN74MeLHnvMlu7A2SbEqyJ8meEydOzLIMSRo98/3hZKYZq+lOrKotVTVeVeNjY2PzXIYkDa/ZBvexJEsBuufj3fgksKLnvOXAkdmXJ0k622yDewewsdveCDzSM74hyZIkq4DVwO65lShJ6nXJTCck+TJwI3BlkkngPuCTwPYkdwAvALcBVNWBJNuBZ4BTwF1VdXpAtUvSSJoxuKvq9nMcuukc528GNs+lKEnSufnNSUlqjMEtSY0xuCWpMQa3JDXG4JakxhjcktQYg1uSGmNwS1JjDG5JaozBLUmNMbglqTEGtyQ1xuCWpMYY3JLUGINbkhpjcEtSYwxuSWqMwS1JjZkxuJM8lOR4kv09Y59I8uMk+7rHLT3H7k0ykeRQkpsHVbgkjap+7ri/AKybZvyzVbWmezwGkORaYANwXXfNA0kWzVexkqQ+gruqvgX8tM/XWw9sq6qTVfU8MAGsnUN9kqSzzGWO++4kT3dTKZd3Y8uAF3vOmezG3iDJpiR7kuw5ceLEHMqQpNEy2+D+HPAOYA1wFPh0N55pzq3pXqCqtlTVeFWNj42NzbIMSRo9swruqjpWVaer6lXg87w2HTIJrOg5dTlwZG4lSpJ6zSq4kyzt2f0gcGbFyQ5gQ5IlSVYBq4HdcytRktTrkplOSPJl4EbgyiSTwH3AjUnWMDUNchi4E6CqDiTZDjwDnALuqqrTgyldkkbTjMFdVbdPM/zgec7fDGyeS1GSpHPzm5OS1BiDW5IaY3BLUmMMbklqjMEtSY0xuCWpMQa3JDVmxnXcUiv2brnzdfu/vekfFqgSabC849bQOjvIpWFhcEtSYwxuSWqMwa2hMN20iHPcGlYGtyQ1xuBW8/wQUqPG4JakxhjcktQYg1uSGmNwayi5okTDbMbgTrIiyTeSHExyIMmHu/ErkuxM8lz3fHnPNfcmmUhyKMnNg2xAkkZNP3fcp4CPVtVvAu8F7kpyLXAPsKuqVgO7un26YxuA64B1wANJFg2ieMkVJRpFMwZ3VR2tqie77ZeBg8AyYD2wtTttK3Brt70e2FZVJ6vqeWACWDvfhUvSqLqgOe4k1wDXA48DV1fVUZgKd+Cq7rRlwIs9l012Y2e/1qYke5LsOXHixIVXLkkjqu/gTnIp8BXgI1X18/OdOs1YvWGgaktVjVfV+NjYWL9lSDPyg0kNu76CO8lipkL7i1X11W74WJKl3fGlwPFufBJY0XP5cuDI/JQrSepnVUmAB4GDVfWZnkM7gI3d9kbgkZ7xDUmWJFkFrAZ2z1/J0hQ/mNSo6ucXcG4APgR8L8m+buxjwCeB7UnuAF4AbgOoqgNJtgPPMLUi5a6qOj3vlUvSiJoxuKvq20w/bw1w0zmu2QxsnkNdkqRz8JuTktQYg1tDxRUlGgUGtyQ1xuBWk1xRolFmcEtSYwxuSWqMwa2h4QeTGhUGtyQ1xuBWc/xgUqPO4JakxhjcktQYg1tDwQ8mNUoMbklqjMEtSY0xuNWU6VaUOE2iUWNwS1JjDG41w/Xb0hSDW5Ia08+PBa9I8o0kB5McSPLhbvwTSX6cZF/3uKXnmnuTTCQ5lOTmQTYgSaOmnx8LPgV8tKqeTHIZsDfJzu7YZ6vqU70nJ7kW2ABcB7wd+HqSd/qDwRoEP5jUKJrxjruqjlbVk932y8BBYNl5LlkPbKuqk1X1PDABrJ2PYiVJFzjHneQa4Hrg8W7o7iRPJ3koyeXd2DLgxZ7LJjl/0Esz8oNJ6TV9B3eSS4GvAB+pqp8DnwPeAawBjgKfPnPqNJfXNK+3KcmeJHtOnDhxwYVL0qjqK7iTLGYqtL9YVV8FqKpjVXW6ql4FPs9r0yGTwIqey5cDR85+zaraUlXjVTU+NjY2lx4kaaT0s6okwIPAwar6TM/40p7TPgjs77Z3ABuSLEmyClgN7J6/kiVptPWzquQG4EPA95Ls68Y+BtyeZA1T0yCHgTsBqupAku3AM0ytSLnLFSUaBFeUaFTNGNxV9W2mn7d+7DzXbAY2z6EuSdI5+M1JXfRcUSK9nsEtSY0xuCWpMQa3muQHkxplBrckNcbg1kXNDyalNzK4JakxBrckNcbgVnP8YFKjzuCWpMYY3JLUGINbFy1XlEjTM7glqTEGt35pklzQYy6vIw0zg1tNGb9zy0KXIC24fn5IQVoQ/3pk0/9v/8nbDWzpDO+4dVG67749r9vvDXFp1BncaobTJNKUfn4s+M1Jdid5KsmBJPd341ck2Znkue758p5r7k0ykeRQkpsH2YAkjZp+7rhPAu+rqncDa4B1Sd4L3APsqqrVwK5unyTXAhuA64B1wANJFg2ieA2vs+e0neOWXtPPjwUX8Eq3u7h7FLAeuLEb3wp8E/irbnxbVZ0Enk8yAawFvjOfhWu4TU2LvBbW9y9cKdJFp69VJd0d817gN4C/r6rHk1xdVUcBqupokqu605cB/9Vz+WQ3dk579+517a3mlf8+aZj1FdxVdRpYk+RtwMNJ3nWe06f7E1NvOCnZBGwCWLlyJT/60Y/6KUUN+2WG6dT/KErtGh8fP+exC1pVUlU/Y2pKZB1wLMlSgO75eHfaJLCi57LlwJFpXmtLVY1X1fjY2NiFlCFJI62fVSVj3Z02Sd4CvB94FtgBbOxO2wg80m3vADYkWZJkFbAa2D3fhUvSqOpnqmQpsLWb5/4VYHtVPZrkO8D2JHcALwC3AVTVgSTbgWeAU8Bd3VSLJGke9LOq5Gng+mnGfwLcdI5rNgOb51ydJOkN/OakJDXG4JakxhjcktQY/1pX/dK4tlqaH95xS1JjDG5JaozBLUmNMbglqTEGtyQ1xuCWpMYY3JLUGINbkhpjcEtSYwxuSWqMwS1JjTG4JakxBrckNcbglqTG9PNjwW9OsjvJU0kOJLm/G/9Ekh8n2dc9bum55t4kE0kOJbl5kA1I0qjp5+/jPgm8r6peSbIY+HaSf+uOfbaqPtV7cpJrgQ3AdcDbga8neac/GCxJ82PGO+6a8kq3u7h7nO9vxF8PbKuqk1X1PDABrJ1zpZIkoM857iSLkuwDjgM7q+rx7tDdSZ5O8lCSy7uxZcCLPZdPdmOSpHnQV3BX1emqWgMsB9YmeRfwOeAdwBrgKPDp7vRM9xJnDyTZlGRPkj0nTpyYVfGSNIouaFVJVf0M+CawrqqOdYH+KvB5XpsOmQRW9Fy2HDgyzWttqarxqhofGxubVfGSNIr6WVUyluRt3fZbgPcDzyZZ2nPaB4H93fYOYEOSJUlWAauB3fNbtiSNrn5WlSwFtiZZxFTQb6+qR5P8U5I1TE2DHAbuBKiqA0m2A88Ap4C7XFEiSfNnxuCuqqeB66cZ/9B5rtkMbJ5baZKk6fjNSUlqjMEtSY0xuCWpMQa3JDXG4JakxhjcktQYg1uSGmNwS1JjDG5JaozBLUmNMbglqTEGtyQ1xuCWpMYY3JLUGINbkhpjcEtSYwxuSWqMwS1JjTG4JakxBrckNcbglqTGGNyS1JhU1ULXQJITwP8ALy10LQNwJfbVmmHtzb7a8mtVNTbdgYsiuAGS7Kmq8YWuY77ZV3uGtTf7Gh5OlUhSYwxuSWrMxRTcWxa6gAGxr/YMa2/2NSQumjluSVJ/LqY7bklSHxY8uJOsS3IoyUSSexa6nguV5KEkx5Ps7xm7IsnOJM91z5f3HLu36/VQkpsXpuqZJVmR5BtJDiY5kOTD3XjTvSV5c5LdSZ7q+rq/G2+6rzOSLEry3SSPdvvD0tfhJN9Lsi/Jnm5sKHqblapasAewCPgB8OvAm4CngGsXsqZZ9PB7wHuA/T1jfwvc023fA/xNt31t1+MSYFXX+6KF7uEcfS0F3tNtXwZ8v6u/6d6AAJd224uBx4H3tt5XT39/AXwJeHRY/l3s6j0MXHnW2FD0NpvHQt9xrwUmquqHVfULYBuwfoFruiBV9S3gp2cNrwe2dttbgVt7xrdV1cmqeh6YYOqfwUWnqo5W1ZPd9svAQWAZjfdWU17pdhd3j6LxvgCSLAf+CPjHnuHm+zqPYe7tvBY6uJcBL/bsT3Zjrbu6qo7CVAACV3XjTfab5BrgeqbuTpvvrZtO2AccB3ZW1VD0Bfwd8JfAqz1jw9AXTP3H9WtJ9ibZ1I0NS28X7JIFfv9MMzbMy1ya6zfJpcBXgI9U1c+T6VqYOnWasYuyt6o6DaxJ8jbg4STvOs/pTfSV5I+B41W1N8mN/VwyzdhF11ePG6rqSJKrgJ1Jnj3Pua31dsEW+o57EljRs78cOLJAtcynY0mWAnTPx7vxpvpNspip0P5iVX21Gx6K3gCq6mfAN4F1tN/XDcCfJjnM1JTj+5L8M+33BUBVHemejwMPMzX1MRS9zcZCB/cTwOokq5K8CdgA7FjgmubDDmBjt70ReKRnfEOSJUlWAauB3QtQ34wydWv9IHCwqj7Tc6jp3pKMdXfaJHkL8H7gWRrvq6rurarlVXUNU3+O/rOq/pzG+wJI8tYkl53ZBj4A7GcIepu1hf50FLiFqRULPwA+vtD1zKL+LwNHgf9l6r/0dwC/CuwCnuuer+g5/+Ndr4eAP1zo+s/T1+8y9b+XTwP7usctrfcG/Bbw3a6v/cBfd+NN93VWjzfy2qqS5vtiatXZU93jwJmcGIbeZvvwm5OS1JiFniqRJF0gg1uSGmNwS1JjDG5JaozBLUmNMbglqTEGtyQ1xuCWpMb8H8EOG9Pp82HgAAAAAElFTkSuQmCC\n",
315 |       "text/plain": [
316 |        "<Figure size 432x288 with 1 Axes>"
317 |       ]
318 |      },
319 |      "metadata": {
320 |       "needs_background": "light"
321 |      },
322 |      "output_type": "display_data"
323 |     }
324 |    ],
325 |    "source": [
326 |     "env = build_environment(seed=SEED)\n",
327 |     "agent = build_agent(pre_trained='model.h5')\n",
328 |     "\n",
329 |     "state = env.reset()\n",
330 |     "final_reward = 0\n",
331 |     "\n",
332 |     "img = plt.imshow(env.render(mode='rgb_array'))\n",
333 |     "while True:\n",
334 |     "    img.set_data(env.render(mode='rgb_array'))\n",
335 |     "    display.display(plt.gcf())\n",
336 |     "    display.clear_output(wait=True)\n",
337 |     "\n",
338 |     "    state = np.reshape(state, [1, state_size])\n",
339 |     "    action = agent.act(state)    \n",
340 |     "    next_state, reward, done, info = env.step(action)\n",
341 |     "    final_reward += reward \n",
342 |     "    \n",
343 |     "    state = next_state\n",
344 |     "    \n",
345 |     "    if done:\n",
346 |     "        print(\"Episode finished with score: {}\".format(final_reward))\n",
347 |     "        break\n",
348 |     "env.close()        "
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": null,
354 |    "metadata": {},
355 |    "outputs": [],
356 |    "source": []
357 |   }
358 |  ],
359 |  "metadata": {
360 |   "kernelspec": {
361 |    "display_name": "Python 3",
362 |    "language": "python",
363 |    "name": "python3"
364 |   },
365 |   "language_info": {
366 |    "codemirror_mode": {
367 |     "name": "ipython",
368 |     "version": 3
369 |    },
370 |    "file_extension": ".py",
371 |    "mimetype": "text/x-python",
372 |    "name": "python",
373 |    "nbconvert_exporter": "python",
374 |    "pygments_lexer": "ipython3",
375 |    "version": "3.6.9"
376 |   }
377 |  },
378 |  "nbformat": 4,
379 |  "nbformat_minor": 2
380 | }


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/README.md:
--------------------------------------------------------------------------------
 1 | # CartPole-v0
 2 | Reinforcement Learning project to train a neural network to play the
 3 | [OpenAI](https://openai.com/) environment [CartPole-v0](https://github.com/openai/gym/wiki/CartPole-v0).  
 4 | ![CartPole-0](assets/cartpole-v0.jpg "CartPole-v0")
 5 | 
 6 | ### Objectives
 7 | 
 8 | 
 9 | ## Additional Information
10 | Tensorflow Version: GPU 2.0.0
11 | 
12 | ## Installation
13 | 1. Create and activate a new environment.
14 | ```
15 | conda create -n openai python=3.6
16 | source activate openai
17 | ```
18 | 2. Install Dependencies.
19 | ```
20 | pip install -r requirements.txt
21 | pip install gym[atari]
22 | ```
23 | 
24 | ### Launch Jupyter notebook
25 | ```
26 | jupyter notebook CartPole-v0.ipynb
27 | ```
28 | 
29 | ### Additional commands
30 | Starts Tensorboard Visualisation.
31 | ```
32 | tensorboard --logdir=logs/
33 | ```
34 | 
35 | #### Rewards
36 | ![Reward](assets/game_reward.png "Reward")
37 |  
38 | 


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/agents/DDQN.py:
--------------------------------------------------------------------------------
  1 | from memory import *
  2 | from keras.layers import *
  3 | from keras.models import *
  4 | from keras.optimizers import *
  5 | from keras.initializers import *
  6 | 
  7 | class DDQNAgent(object):
  8 |     def __init__(self,
  9 |                  state_size,
 10 |                  action_size,
 11 |                  buffer_size=10000,
 12 |                  batch_size=32,
 13 |                  gamma=0.99,
 14 |                  epsilon_start=1.0,
 15 |                  epsilon_min=0.1,
 16 |                  epsilon_steps_to_min=1000,
 17 |                  tau=0.1,
 18 |                  mode='QNetwork',
 19 |                  use_PER=True,
 20 |                  pre_trained=None):
 21 | 
 22 |         self.state_size = state_size
 23 |         self.action_size = action_size
 24 | 
 25 | 
 26 |         self.batch_size = batch_size
 27 |         self.gamma = gamma
 28 |         self.epsilon = epsilon_start
 29 |         self.epsilon_min = epsilon_min
 30 |         self.epsilon_step = (self.epsilon - self.epsilon_min) / epsilon_steps_to_min
 31 |         self.tau = tau
 32 | 
 33 |         self.model = self.build_model(mode, pre_trained)
 34 |         self.target_model = self.build_model(mode, pre_trained)
 35 |         self.hard_update_target_network()
 36 | 
 37 |         self.use_PER = use_PER
 38 | 
 39 |         if self.use_PER:
 40 |             self.replay_buffer = PrioritizedReplayBuffer(capacity=buffer_size)
 41 |         else:
 42 |             self.replay_buffer = Memory(max_size=buffer_size)
 43 | 
 44 |     def build_model(self, mode, pre_trained):
 45 |         model = Sequential()
 46 |         model.add(Dense(64, input_dim=self.state_size, activation='relu'))
 47 |         model.add(Dense(64, activation='relu'))
 48 | 
 49 |         if mode == "QNetwork":
 50 |             model.add(Dense(self.action_size, activation='linear'))
 51 | 
 52 |         if mode == "DuelingDQN":
 53 |             model.add(Dense(self.action_size + 1, activation='linear'))
 54 |             model.add(Lambda(lambda i: K.expand_dims(i[:,0],-1) + i[:,1:] - K.mean(i[:,1:], keepdims=True),
 55 |                              output_shape=(self.action_size,)))
 56 | 
 57 |         if pre_trained:
 58 |             model = load_model(pre_trained)
 59 | 
 60 |         model.compile(optimizer=Adam(lr=0.001), loss='mse')
 61 |         return model
 62 | 
 63 |     def hard_update_target_network(self):
 64 |         pars = self.model.get_weights()
 65 |         self.target_model.set_weights(pars)
 66 | 
 67 |     def soft_update_target_network(self):
 68 |         pars_behavior = self.model.get_weights()
 69 |         pars_target = self.target_model.get_weights()
 70 | 
 71 |         ctr = 0
 72 |         for par_behavior,par_target in zip(pars_behavior,pars_target):
 73 |             par_target = par_target*(1-self.tau) + par_behavior*self.tau
 74 |             pars_target[ctr] = par_target
 75 |             ctr += 1
 76 | 
 77 |         self.target_model.set_weights(pars_target)
 78 | 
 79 |     def remember(self, state, action, reward, next_state, done):
 80 |         self.replay_buffer.add((state, action, reward, next_state, done))
 81 | 
 82 |     def preprocess(self, state):
 83 |         return np.reshape(state, [1, self.state_size])
 84 | 
 85 |     def act(self, state):
 86 |         # Update exploration rate
 87 |         if self.epsilon > self.epsilon_min:
 88 |             self.epsilon -= self.epsilon_step
 89 | 
 90 |             # Choose Action
 91 |         if np.random.rand() <= self.epsilon:
 92 |             action = np.random.choice(self.action_size)
 93 |         else:
 94 |             Qs = self.model.predict(state)[0]
 95 |             action = np.argmax(Qs)
 96 | 
 97 |         return action
 98 | 
 99 |     def train(self):
100 |         indices, mini_batch, importance  = self.replay_buffer.sample(self.batch_size)
101 | 
102 |         states = []
103 |         actions = []
104 |         rewards = []
105 |         next_states = []
106 |         dones = []
107 | 
108 |         Q_wants = []
109 |         td_errors = np.zeros(self.batch_size)
110 | 
111 |         for i in range(len(mini_batch)):
112 |             if not self.use_PER:
113 |                 state, action, reward, next_state, done = mini_batch[i]
114 |             else:
115 |                 state = mini_batch[i][0][0]
116 |                 action = mini_batch[i][0][1]
117 |                 reward = mini_batch[i][0][2]
118 |                 next_state = mini_batch[i][0][3]
119 |                 done = mini_batch[i][0][4]
120 | 
121 |             states.append(state)
122 |             actions.append(action)
123 |             rewards.append(reward)
124 |             next_states.append(next_state)
125 |             dones.append(done)
126 | 
127 |         states_tensor = np.reshape(states,(self.batch_size,len(states[0])))
128 |         Q_wants_pred = self.model.predict(states_tensor)
129 | 
130 |         next_states_tensor = np.reshape(next_states,(self.batch_size,len(next_states[0])))
131 |         Q_next_state_vecs = self.model.predict(next_states_tensor)
132 |         Q_target_next_state_vecs = self.target_model.predict(next_states_tensor)
133 | 
134 |         for i in range(len(mini_batch)):
135 |             action = actions[i]
136 |             reward = rewards[i]
137 |             done = dones[i]
138 | 
139 |             Q_want = Q_wants_pred[i]
140 |             Q_want_old = Q_want[action]
141 | 
142 |             if done:
143 |                 Q_want[action] = reward
144 |             else:
145 |                 Q_next_state_vec = Q_next_state_vecs[i]
146 |                 action_max = np.argmax(Q_next_state_vec)
147 | 
148 |                 Q_target_next_state_vec = Q_target_next_state_vecs[i]
149 |                 Q_target_next_state_max = Q_target_next_state_vec[action_max]
150 | 
151 |                 Q_want[action] = reward + self.gamma*Q_target_next_state_max
152 |                 Q_want_tensor = np.reshape(Q_want,(1,len(Q_want)))
153 | 
154 |             Q_wants.append(Q_want)
155 |             td_errors[i] = abs(Q_want_old - Q_want[action])
156 | 
157 |         states = np.array(states)
158 |         Q_wants = np.array(Q_wants)
159 |         self.model.fit(states, Q_wants, verbose=False, epochs=1)
160 | 
161 |         # update replay buffer
162 |         self.replay_buffer.batch_update(indices, np.array(td_errors))
163 | 
164 |     def save(self, file='model.h5'):
165 |         print('Save model...')
166 |         self.model.save(file)
167 | 


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/CartPole-v0/agents/__init__.py


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/assets/cartpole-v0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/CartPole-v0/assets/cartpole-v0.jpg


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/assets/game_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/CartPole-v0/assets/game_reward.png


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/memory.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import deque
  3 | 
  4 | class Memory(object):
  5 |     def __init__(self, max_size=2000):
  6 |         self.max_size = max_size
  7 |         self.buffer = deque(maxlen=max_size)
  8 | 
  9 |     def add(self, experience):
 10 |         if len(self.buffer) <= self.max_size:
 11 |             self.buffer.append(experience)
 12 |         else:
 13 |             self.buffer[0] = experience
 14 | 
 15 |     def sample(self, batch_size):
 16 |         return [], rn.sample(self.buffer, batch_size), []
 17 | 
 18 |     def batch_update(self, indices, td_errors):
 19 |         pass
 20 | 
 21 | class PrioritizedReplayBuffer(object):
 22 |     PER_e = 0.01
 23 |     PER_a = 0.6
 24 |     PER_b = 0.4
 25 | 
 26 |     PER_b_increment_per_sampling = 0.001
 27 | 
 28 |     absolute_error_upper = 1.
 29 | 
 30 |     def __init__(self, capacity):
 31 |         self.tree = SumTree(capacity)
 32 | 
 33 |     def add(self, experience):
 34 |         max_priority = np.max(self.tree.tree[-self.tree.capacity:])
 35 | 
 36 |         if max_priority == 0:
 37 |             max_priority = self.absolute_error_upper
 38 | 
 39 |         self.tree.add(max_priority, experience)
 40 | 
 41 |     def sample(self, n):
 42 |         memory_b = []
 43 |         b_idx, b_ISWeights = np.empty((n,), dtype=np.int32), np.empty((n, 1), dtype=np.float32)
 44 |         priority_segment = self.tree.total_priority / n
 45 |         self.PER_b = np.min([1., self.PER_b + self.PER_b_increment_per_sampling])
 46 |         p_min = np.min(self.tree.tree[-self.tree.capacity:]) / self.tree.total_priority
 47 |         max_weight = (p_min * n) ** (-self.PER_b)
 48 | 
 49 |         for i in range(n):
 50 |             a, b = priority_segment * i, priority_segment * (i + 1)
 51 |             value = np.random.uniform(a, b)
 52 | 
 53 |             index, priority, data = self.tree.get_leaf(value)
 54 | 
 55 |             sampling_probabilities = priority / self.tree.total_priority
 56 | 
 57 |             b_ISWeights[i, 0] = np.power(n * sampling_probabilities, -self.PER_b)/ max_weight
 58 | 
 59 |             b_idx[i]= index
 60 | 
 61 |             experience = [data]
 62 | 
 63 |             memory_b.append(experience)
 64 | 
 65 |         return b_idx, memory_b, b_ISWeights
 66 | 
 67 |     def batch_update(self, tree_idx, abs_errors):
 68 |         abs_errors += self.PER_e
 69 |         clipped_errors = np.minimum(abs_errors, self.absolute_error_upper)
 70 |         ps = np.power(clipped_errors, self.PER_a)
 71 | 
 72 |         for ti, p in zip(tree_idx, ps):
 73 |             self.tree.update(ti, p)
 74 | 
 75 | class SumTree(object):
 76 |     data_pointer = 0
 77 | 
 78 |     def __init__(self, capacity):
 79 |         self.capacity = capacity
 80 |         self.tree = np.zeros(2 * capacity - 1)
 81 |         self.data = np.zeros(capacity, dtype=object)
 82 | 
 83 |     def add(self, priority, data):
 84 |         tree_index = self.data_pointer + self.capacity - 1
 85 |         self.data[self.data_pointer] = data
 86 |         self.update (tree_index, priority)
 87 |         self.data_pointer += 1
 88 | 
 89 |         if self.data_pointer >= self.capacity:
 90 |             self.data_pointer = 0
 91 | 
 92 |     def update(self, tree_index, priority):
 93 |         change = priority - self.tree[tree_index]
 94 |         self.tree[tree_index] = priority
 95 | 
 96 |         while tree_index != 0:
 97 |             tree_index = (tree_index - 1) // 2
 98 |             self.tree[tree_index] += change
 99 | 
100 |     def get_leaf(self, v):
101 |         parent_index = 0
102 | 
103 |         while True:
104 |             left_child_index = 2 * parent_index + 1
105 |             right_child_index = left_child_index + 1
106 | 
107 |             if left_child_index >= len(self.tree):
108 |                 leaf_index = parent_index
109 |                 break
110 | 
111 |             else:
112 |                 if v <= self.tree[left_child_index]:
113 |                     parent_index = left_child_index
114 | 
115 |                 else:
116 |                     v -= self.tree[left_child_index]
117 |                     parent_index = right_child_index
118 | 
119 |         data_index = leaf_index - self.capacity + 1
120 | 
121 |         return leaf_index, self.tree[leaf_index], self.data[data_index]
122 | 
123 |     @property
124 |     def total_priority(self):
125 |         return self.tree[0]
126 | 


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/CartPole-v0/model.h5


--------------------------------------------------------------------------------
/OpenAI/CartPole-v0/requirements.txt:
--------------------------------------------------------------------------------
 1 | prompt_toolkit==2.0.10
 2 | matplotlib
 3 | numpy==1.16.4
 4 | pandas
 5 | opencv-python
 6 | pillow
 7 | imutils
 8 | scikit-image
 9 | tqdm
10 | tensorflow-gpu>=2.4.0
11 | Keras==2.3.1
12 | h5py
13 | ipykernel
14 | jupyter
15 | gym
16 | gym[atari]
17 | 


--------------------------------------------------------------------------------
/OpenAI/HumanoidPyBulletEnv-v0/HumanoidPyBulletEnv-v0.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## HumanoidPyBulletEnv-v0\n",
  8 |     "\n",
  9 |     "In this notebook, you will implement a PPO agent with OpenAI Gym's HumanoidPyBulletEnv-v0 environment."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import math\n",
 19 |     "import random\n",
 20 |     "import sys\n",
 21 |     "import pathlib\n",
 22 |     "\n",
 23 |     "import gym\n",
 24 |     "import pybullet\n",
 25 |     "import pybulletgym\n",
 26 |     "import numpy as np\n",
 27 |     "\n",
 28 |     "import torch\n",
 29 |     "import torch.nn as nn\n",
 30 |     "import torch.optim as optim\n",
 31 |     "import torch.nn.functional as F\n",
 32 |     "from torch.distributions import Normal"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "from IPython.display import clear_output\n",
 42 |     "import matplotlib.pyplot as plt\n",
 43 |     "%matplotlib inline"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "<h2>Use CUDA</h2>"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "use_cuda = torch.cuda.is_available()\n",
 60 |     "device   = torch.device(\"cuda\" if use_cuda else \"cpu\")"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "<h2>Create Environments</h2>"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "from multiprocessing_env import SubprocVecEnv\n",
 77 |     "import time\n",
 78 |     "\n",
 79 |     "LOAD_CHECKPOINT = True\n",
 80 |     "DO_TRAINING = False\n",
 81 |     "\n",
 82 |     "num_envs = 8\n",
 83 |     "env_name = \"HumanoidPyBulletEnv-v0\"\n",
 84 |     "\n",
 85 |     "hidden_size         = 64\n",
 86 |     "\n",
 87 |     "\n",
 88 |     "policy_optimizer_lr = 0.00005\n",
 89 |     "policy_stopping_kl  = 0.02\n",
 90 |     "\n",
 91 |     "value_optimizer_lr  = 0.00015\n",
 92 |     "value_stopping_mse  = 25\n",
 93 |     "\n",
 94 |     "entropy_loss_weight = 0.01\n",
 95 |     "\n",
 96 |     "num_steps           = 1024\n",
 97 |     "mini_batch_size     = 64\n",
 98 |     "ppo_epochs          = 15\n",
 99 |     "threshold_reward    = 6000\n",
100 |     "\n",
101 |     "\n",
102 |     "ACTOR_CHECKPOINT_PATH = pathlib.Path(\"./pretrained/\" + \"actor_\" + env_name + \"_checkpoint.pt\")\n",
103 |     "ACTOR_FINAL_PATH = pathlib.Path(\"./pretrained/\" + \"actor_\" + env_name + \"_final.pt\")\n",
104 |     "\n",
105 |     "CRITIC_CHECKPOINT_PATH = pathlib.Path(\"./pretrained/\" + \"critic_\" + env_name + \"_checkpoint.pt\")\n",
106 |     "CRITIC_FINAL_PATH = pathlib.Path(\"./pretrained/\" + \"critic_\" + env_name + \"_final.pt\")\n",
107 |     "\n",
108 |     "def make_env():\n",
109 |     "    def _thunk():\n",
110 |     "        env = gym.make(env_name)\n",
111 |     "        return env\n",
112 |     "\n",
113 |     "    return _thunk\n",
114 |     "\n",
115 |     "envs = [make_env() for i in range(num_envs)]\n",
116 |     "envs = SubprocVecEnv(envs)\n",
117 |     "\n",
118 |     "env = gym.make(env_name)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "<h2>Neural Network</h2>"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "class BaseModule(nn.Module):\n",
135 |     "    def __init__(self):\n",
136 |     "        super(BaseModule, self).__init__()\n",
137 |     "\n",
138 |     "    def _build_network(self, num_inputs, num_outputs, hidden_size):\n",
139 |     "        \n",
140 |     "        if isinstance(hidden_size, int):\n",
141 |     "            return nn.Sequential(\n",
142 |     "                nn.Linear(num_inputs, hidden_size),\n",
143 |     "                nn.ReLU(),\n",
144 |     "                nn.Linear(hidden_size, num_outputs)\n",
145 |     "            )\n",
146 |     "            \n",
147 |     "        else:\n",
148 |     "            return nn.Sequential(\n",
149 |     "                nn.Linear(num_inputs, hidden_size[0]),\n",
150 |     "                nn.ReLU(),\n",
151 |     "                *self._build_hidden(hidden_size),\n",
152 |     "                nn.Linear(hidden_size[-1], num_outputs)\n",
153 |     "            )        \n",
154 |     "        \n",
155 |     "    def _build_hidden(self, hidden_size):\n",
156 |     "        hidden_layers = []\n",
157 |     "        for i in range(len(hidden_size)-1):            \n",
158 |     "            hidden_layers.append(nn.Linear(hidden_size[i], hidden_size[i+1]))\n",
159 |     "            hidden_layers.append(nn.ReLU())\n",
160 |     "        return hidden_layers        \n",
161 |     "        \n",
162 |     "class Actor(BaseModule):\n",
163 |     "    def __init__(self, num_inputs, num_outputs, hidden_size, std=0.0):\n",
164 |     "        super(Actor, self).__init__()\n",
165 |     "        self.model = self._build_network(num_inputs, num_outputs, hidden_size)                \n",
166 |     "        self.log_std = nn.Parameter(torch.ones(1, num_outputs) * std, requires_grad=True)\n",
167 |     "                \n",
168 |     "    def forward(self, x):\n",
169 |     "        mu    = self.model(x)        \n",
170 |     "        std   = self.log_std.exp().expand_as(mu)\n",
171 |     "        dist  = Normal(mu, std)\n",
172 |     "        return dist\n",
173 |     "          \n",
174 |     "class Critic(BaseModule):\n",
175 |     "    def __init__(self, num_inputs, hidden_size):\n",
176 |     "        super(Critic, self).__init__()\n",
177 |     "        self.model = self._build_network(num_inputs, 1, hidden_size)\n",
178 |     "                \n",
179 |     "    def forward(self, x):\n",
180 |     "        return self.model(x)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "def plot(frame_idx, rewards):\n",
190 |     "    clear_output(True)\n",
191 |     "    plt.figure(figsize=(20,5))\n",
192 |     "    plt.subplot(131)\n",
193 |     "    plt.title('frame %s. reward: %s' % (frame_idx, rewards[-1]))\n",
194 |     "    \n",
195 |     "    mean = []\n",
196 |     "    for x in range(len(rewards)):\n",
197 |     "        mean.append(np.array(rewards[:x]).mean())\n",
198 |     "        \n",
199 |     "    plt.plot(rewards, label=\"Reward\")\n",
200 |     "    plt.plot(mean, label=\"mean\")\n",
201 |     "    plt.legend()\n",
202 |     "    plt.show()\n",
203 |     "    \n",
204 |     "def test_env(vis=False):\n",
205 |     "    state = env.reset()\n",
206 |     "    if vis: env.render()\n",
207 |     "    done = False\n",
208 |     "    total_reward = 0\n",
209 |     "    while not done:\n",
210 |     "        state = torch.FloatTensor(state).unsqueeze(0).to(device)\n",
211 |     "        dist = policy_model(state)\n",
212 |     "        next_state, reward, done, _ = env.step(dist.sample().cpu().numpy()[0])\n",
213 |     "        state = next_state\n",
214 |     "        if vis: env.render()\n",
215 |     "        total_reward += reward\n",
216 |     "    return total_reward"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": [
225 |     "def compute_gae(next_value, rewards, masks, values, gamma=0.99, tau=0.97):\n",
226 |     "    values = values + [next_value]\n",
227 |     "    gae = 0\n",
228 |     "    returns = []\n",
229 |     "    for step in reversed(range(len(rewards))):\n",
230 |     "        delta = rewards[step] + gamma * values[step + 1] * masks[step] - values[step]\n",
231 |     "        gae = delta + gamma * tau * masks[step] * gae\n",
232 |     "        returns.insert(0, gae + values[step])\n",
233 |     "    return returns"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "def ppo_iter(mini_batch_size, states, actions, log_probs, returns, advantage):\n",
243 |     "    batch_size = states.size(0)\n",
244 |     "    for _ in range(batch_size // mini_batch_size):\n",
245 |     "        rand_ids = np.random.randint(0, batch_size, mini_batch_size)\n",
246 |     "        yield states[rand_ids, :], actions[rand_ids, :], log_probs[rand_ids, :], returns[rand_ids, :], advantage[rand_ids, :]\n",
247 |     "\n",
248 |     "\n",
249 |     "def ppo_update(ppo_epochs, mini_batch_size, states, actions, log_probs, returns, advantages, \n",
250 |     "               clip_param=0.1, value_loss_coef=0.5, entropy_coef=0.01, max_grad_norm=0.5):\n",
251 |     "    # Policy\n",
252 |     "    for _ in range(ppo_epochs):\n",
253 |     "        for state, action, old_log_probs, return_, advantage in ppo_iter(mini_batch_size, states, actions, log_probs, returns, advantages):\n",
254 |     "            dist = policy_model(state)\n",
255 |     "            \n",
256 |     "            entropy = dist.entropy().mean()\n",
257 |     "            new_log_probs = dist.log_prob(action)\n",
258 |     "\n",
259 |     "            ratio = (new_log_probs - old_log_probs).exp()\n",
260 |     "            surr1 = ratio * advantage\n",
261 |     "            surr2 = torch.clamp(ratio, 1.0 - clip_param, 1.0 + clip_param) * advantage\n",
262 |     "\n",
263 |     "            policy_loss  = -torch.min(surr1, surr2).mean()\n",
264 |     "            entropy_loss = -entropy.mean() * entropy_loss_weight\n",
265 |     "            \n",
266 |     "            policy_optimizer.zero_grad()\n",
267 |     "            \n",
268 |     "            if max_grad_norm:\n",
269 |     "                torch.nn.utils.clip_grad_norm_(policy_model.model.parameters(), max_grad_norm)\n",
270 |     "            \n",
271 |     "            (policy_loss + entropy_loss).backward()\n",
272 |     "            policy_optimizer.step()\n",
273 |     "            \n",
274 |     "            with torch.no_grad():\n",
275 |     "                dist = policy_model(state)                \n",
276 |     "                logpas_pred_all = dist.log_prob(action)                \n",
277 |     "                kl = (new_log_probs - logpas_pred_all).mean()\n",
278 |     "                if kl.item() > policy_stopping_kl:\n",
279 |     "                    break\n",
280 |     "          \n",
281 |     "    # Value    \n",
282 |     "    for _ in range(ppo_epochs):\n",
283 |     "        for state, action, old_log_probs, return_, advantage in ppo_iter(mini_batch_size, states, actions, log_probs, returns, advantages):\n",
284 |     "            value = value_model.model(state)\n",
285 |     "    \n",
286 |     "            value_loss = 0.5 * (return_ - value).pow(2).mean()\n",
287 |     "            \n",
288 |     "            value_optimizer.zero_grad()\n",
289 |     "            \n",
290 |     "            if max_grad_norm:\n",
291 |     "                torch.nn.utils.clip_grad_norm_(value_model.model.parameters(), max_grad_norm)      \n",
292 |     "        \n",
293 |     "            value_loss.backward()\n",
294 |     "            value_optimizer.step()\n",
295 |     "            \n",
296 |     "            with torch.no_grad():\n",
297 |     "                values_pred_all = value_model.model(state)\n",
298 |     "                mse = 0.5 * (value - values_pred_all).pow(2).mean()\n",
299 |     "                if mse.item() > value_stopping_mse:\n",
300 |     "                    break"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "metadata": {},
307 |    "outputs": [],
308 |    "source": [
309 |     "def loadCheckpoint(filename, model):\n",
310 |     "    checkpoint = torch.load(filename)\n",
311 |     "    model.load_state_dict(checkpoint['model_state_dict'])\n",
312 |     "    model.to(device)\n",
313 |     "    model.eval()"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "def saveCheckpoint(filename, epoch, model, optimizer):\n",
323 |     "    checkpoint = {\n",
324 |     "        'epoch': epoch,\n",
325 |     "        'model_state_dict': model.state_dict(),\n",
326 |     "        'optimizer_state_dict': optimizer.state_dict(),\n",
327 |     "    }\n",
328 |     "\n",
329 |     "    torch.save(checkpoint, filename)"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": null,
335 |    "metadata": {},
336 |    "outputs": [],
337 |    "source": [
338 |     "num_inputs  = envs.observation_space.shape[0]\n",
339 |     "num_outputs = envs.action_space.shape[0]\n",
340 |     "\n",
341 |     "policy_model = Actor(num_inputs, num_outputs, hidden_size).to(device)\n",
342 |     "policy_optimizer = optim.Adam(policy_model.parameters(), lr=policy_optimizer_lr) \n",
343 |     "\n",
344 |     "value_model = Critic(num_inputs, hidden_size).to(device)\n",
345 |     "value_optimizer = optim.Adam(value_model.parameters(), lr=value_optimizer_lr)\n",
346 |     "\n",
347 |     "if LOAD_CHECKPOINT:    \n",
348 |     "    loadCheckpoint(ACTOR_CHECKPOINT_PATH, policy_model)\n",
349 |     "    loadCheckpoint(CRITIC_CHECKPOINT_PATH, value_model)    \n",
350 |     "    \n",
351 |     "print(policy_model)\n",
352 |     "print(value_model)"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "def train():\n",
362 |     "    frame_idx  = 0\n",
363 |     "    train_epoch = 0\n",
364 |     "\n",
365 |     "    test_rewards = []\n",
366 |     "    best_reward = None\n",
367 |     "\n",
368 |     "    state = envs.reset()\n",
369 |     "    early_stop = False\n",
370 |     "\n",
371 |     "    while not early_stop:\n",
372 |     "        state = envs.reset()\n",
373 |     "\n",
374 |     "        log_probs = []\n",
375 |     "        values    = []\n",
376 |     "        states    = []\n",
377 |     "        actions   = []\n",
378 |     "        rewards   = []\n",
379 |     "        masks     = []\n",
380 |     "\n",
381 |     "        for _ in range(num_steps):\n",
382 |     "            state = torch.FloatTensor(state).to(device)\n",
383 |     "            dist = policy_model(state)\n",
384 |     "            value = value_model(state)\n",
385 |     "\n",
386 |     "            action = dist.sample()\n",
387 |     "            next_state, reward, done, _ = envs.step(action.cpu().numpy())\n",
388 |     "\n",
389 |     "            log_prob = dist.log_prob(action)\n",
390 |     "\n",
391 |     "            log_probs.append(log_prob)\n",
392 |     "            values.append(value)\n",
393 |     "            rewards.append(torch.FloatTensor(reward).unsqueeze(1).to(device))\n",
394 |     "            masks.append(torch.FloatTensor(1 - done).unsqueeze(1).to(device))\n",
395 |     "\n",
396 |     "            states.append(state)\n",
397 |     "            actions.append(action)\n",
398 |     "\n",
399 |     "            state = next_state\n",
400 |     "            frame_idx += 1\n",
401 |     "\n",
402 |     "        next_state = torch.FloatTensor(next_state).to(device)\n",
403 |     "        next_value = value_model(next_state)\n",
404 |     "        returns = compute_gae(next_value, rewards, masks, values)\n",
405 |     "\n",
406 |     "        returns   = torch.cat(returns).detach()\n",
407 |     "        log_probs = torch.cat(log_probs).detach()\n",
408 |     "        values    = torch.cat(values).detach()\n",
409 |     "        states    = torch.cat(states)\n",
410 |     "        actions   = torch.cat(actions)\n",
411 |     "        advantage = returns - values\n",
412 |     "\n",
413 |     "        ppo_update(ppo_epochs, mini_batch_size, states, actions, log_probs, returns, advantage)\n",
414 |     "        train_epoch += 1    \n",
415 |     "\n",
416 |     "        if train_epoch % 10 == 0:\n",
417 |     "            test_reward = np.mean([test_env() for _ in range(10)])        \n",
418 |     "            test_rewards.append(test_reward)\n",
419 |     "            plot(train_epoch, test_rewards)\n",
420 |     "\n",
421 |     "            if best_reward is None or best_reward < test_reward:            \n",
422 |     "                if best_reward is not None:                \n",
423 |     "                    saveCheckpoint(ACTOR_FINAL_PATH, train_epoch, policy_model, policy_optimizer)\n",
424 |     "                    saveCheckpoint(CRITIC_FINAL_PATH, train_epoch, value_model, value_optimizer)\n",
425 |     "\n",
426 |     "                best_reward = test_reward\n",
427 |     "\n",
428 |     "            if test_reward > threshold_reward:            \n",
429 |     "                early_stop = True        \n",
430 |     "\n",
431 |     "        if train_epoch % 100 == 0:\n",
432 |     "            saveCheckpoint(ACTOR_CHECKPOINT_PATH, train_epoch, policy_model, policy_optimizer)\n",
433 |     "            saveCheckpoint(CRITIC_CHECKPOINT_PATH, train_epoch, value_model, value_optimizer)\n",
434 |     "            \n",
435 |     "if DO_TRAINING:\n",
436 |     "    train()"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "metadata": {},
442 |    "source": [
443 |     "### Replay"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": null,
449 |    "metadata": {},
450 |    "outputs": [],
451 |    "source": [
452 |     "env = gym.make(env_name)\n",
453 |     "env.render(mode=\"human\")\n",
454 |     "\n",
455 |     "for i_episode in range(5):\n",
456 |     "    \n",
457 |     "    state = env.reset()\n",
458 |     "    done = False\n",
459 |     "    total_reward = 0\n",
460 |     "    \n",
461 |     "    frame_idx = 0\n",
462 |     "    \n",
463 |     "    distance = 3\n",
464 |     "    yaw = 0\n",
465 |     "    \n",
466 |     "    humanPos, humanOrn = pybullet.getBasePositionAndOrientation(1)\n",
467 |     "    pybullet.resetDebugVisualizerCamera(distance, yaw, -20, humanPos)   \n",
468 |     "            \n",
469 |     "    while not done:\n",
470 |     "        frame_idx += 1\n",
471 |     "        \n",
472 |     "        state = torch.FloatTensor(state).unsqueeze(0).to(device)\n",
473 |     "        dist = policy_model(state)\n",
474 |     "        action = dist.sample().cpu().numpy()[0]\n",
475 |     "        next_state, reward, done, _ = env.step(action)\n",
476 |     "        \n",
477 |     "        state = next_state\n",
478 |     "        total_reward += reward\n",
479 |     "                \n",
480 |     "        time.sleep(1/30)\n",
481 |     "        \n",
482 |     "        if frame_idx % 150 == 0:\n",
483 |     "            humanPos, humanOrn = pybullet.getBasePositionAndOrientation(1)\n",
484 |     "            pybullet.resetDebugVisualizerCamera(distance, yaw, -20, humanPos)   \n",
485 |     "    \n",
486 |     "    print(\"episode:\", i_episode, \"reward:\", total_reward, \"frames\", frame_idx)\n",
487 |     "\n",
488 |     "env.close()"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": null,
494 |    "metadata": {},
495 |    "outputs": [],
496 |    "source": []
497 |   },
498 |   {
499 |    "cell_type": "code",
500 |    "execution_count": null,
501 |    "metadata": {},
502 |    "outputs": [],
503 |    "source": []
504 |   }
505 |  ],
506 |  "metadata": {
507 |   "kernelspec": {
508 |    "display_name": "Python 3",
509 |    "language": "python",
510 |    "name": "python3"
511 |   },
512 |   "language_info": {
513 |    "codemirror_mode": {
514 |     "name": "ipython",
515 |     "version": 3
516 |    },
517 |    "file_extension": ".py",
518 |    "mimetype": "text/x-python",
519 |    "name": "python",
520 |    "nbconvert_exporter": "python",
521 |    "pygments_lexer": "ipython3",
522 |    "version": "3.8.8"
523 |   }
524 |  },
525 |  "nbformat": 4,
526 |  "nbformat_minor": 2
527 | }
528 | 


--------------------------------------------------------------------------------
/OpenAI/HumanoidPyBulletEnv-v0/README.md:
--------------------------------------------------------------------------------
 1 | # Humanoid Walker Problem
 2 | 
 3 | ### Getting Started
 4 | The environment to the Humanoid is described [here](https://github.com/benelot/pybullet-gym/blob/master/README.md).  
 5 | 
 6 | ### Solution Video
 7 | [![HumanoidPyBulletEnv-v0](http://img.youtube.com/vi/dxZP1icxsMw/0.jpg)](https://www.youtube.com/watch?v=dxZP1icxsMw "BipedalWalker-v3")
 8 | 
 9 | The video shows in the first part the behaviour of the untrained agent and then in comparison the behaviour of the trained agent.
10 | 
11 | ### Solution Info
12 | My learning algorithm is a [Proximal Policy Optimization(PPO)]([https://arxiv.org/pdf/1707.06347.pdf]).
13 | 
14 | ### Instructions
15 | 
16 | start Jupyter Notebook `HumanoidPyBulletEnv-v0.ipynb` and follow the instructions. 
17 | 


--------------------------------------------------------------------------------
/OpenAI/HumanoidPyBulletEnv-v0/multiprocessing_env.py:
--------------------------------------------------------------------------------
  1 | #This code is from openai baseline
  2 | #https://github.com/openai/baselines/tree/master/baselines/common/vec_env
  3 | 
  4 | import numpy as np
  5 | from multiprocessing import Process, Pipe
  6 | import gym
  7 | import pybullet
  8 | import pybulletgym
  9 | 
 10 | def worker(remote, parent_remote, env_fn_wrapper):
 11 |     parent_remote.close()
 12 |     env = env_fn_wrapper.x()
 13 |     while True:
 14 |         cmd, data = remote.recv()
 15 |         if cmd == 'step':
 16 |             ob, reward, done, info = env.step(data)
 17 |             if done:
 18 |                 ob = env.reset()
 19 |             remote.send((ob, reward, done, info))
 20 |         elif cmd == 'reset':
 21 |             ob = env.reset()
 22 |             remote.send(ob)
 23 |         elif cmd == 'reset_task':
 24 |             ob = env.reset_task()
 25 |             remote.send(ob)
 26 |         elif cmd == 'close':
 27 |             remote.close()
 28 |             break
 29 |         elif cmd == 'get_spaces':
 30 |             remote.send((env.observation_space, env.action_space))
 31 |         else:
 32 |             raise NotImplementedError
 33 | 
 34 | class VecEnv(object):
 35 |     """
 36 |     An abstract asynchronous, vectorized environment.
 37 |     """
 38 |     def __init__(self, num_envs, observation_space, action_space):
 39 |         self.num_envs = num_envs
 40 |         self.observation_space = observation_space
 41 |         self.action_space = action_space
 42 | 
 43 |     def reset(self):
 44 |         """
 45 |         Reset all the environments and return an array of
 46 |         observations, or a tuple of observation arrays.
 47 |         If step_async is still doing work, that work will
 48 |         be cancelled and step_wait() should not be called
 49 |         until step_async() is invoked again.
 50 |         """
 51 |         pass
 52 | 
 53 |     def step_async(self, actions):
 54 |         """
 55 |         Tell all the environments to start taking a step
 56 |         with the given actions.
 57 |         Call step_wait() to get the results of the step.
 58 |         You should not call this if a step_async run is
 59 |         already pending.
 60 |         """
 61 |         pass
 62 | 
 63 |     def step_wait(self):
 64 |         """
 65 |         Wait for the step taken with step_async().
 66 |         Returns (obs, rews, dones, infos):
 67 |          - obs: an array of observations, or a tuple of
 68 |                 arrays of observations.
 69 |          - rews: an array of rewards
 70 |          - dones: an array of "episode done" booleans
 71 |          - infos: a sequence of info objects
 72 |         """
 73 |         pass
 74 | 
 75 |     def close(self):
 76 |         """
 77 |         Clean up the environments' resources.
 78 |         """
 79 |         pass
 80 | 
 81 |     def step(self, actions):
 82 |         self.step_async(actions)
 83 |         return self.step_wait()
 84 | 
 85 |     
 86 | class CloudpickleWrapper(object):
 87 |     """
 88 |     Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle)
 89 |     """
 90 |     def __init__(self, x):
 91 |         self.x = x
 92 |     def __getstate__(self):
 93 |         import cloudpickle
 94 |         return cloudpickle.dumps(self.x)
 95 |     def __setstate__(self, ob):
 96 |         import pickle
 97 |         self.x = pickle.loads(ob)
 98 | 
 99 |         
100 | class SubprocVecEnv(VecEnv):
101 |     def __init__(self, env_fns, spaces=None):
102 |         """
103 |         envs: list of gym environments to run in subprocesses
104 |         """
105 |         self.waiting = False
106 |         self.closed = False
107 |         nenvs = len(env_fns)
108 |         self.nenvs = nenvs
109 |         self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
110 |         self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
111 |             for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
112 |         for p in self.ps:
113 |             p.daemon = True # if the main process crashes, we should not cause things to hang
114 |             p.start()
115 |         for remote in self.work_remotes:
116 |             remote.close()
117 | 
118 |         self.remotes[0].send(('get_spaces', None))
119 |         observation_space, action_space = self.remotes[0].recv()
120 |         VecEnv.__init__(self, len(env_fns), observation_space, action_space)
121 | 
122 |     def step_async(self, actions):
123 |         for remote, action in zip(self.remotes, actions):
124 |             remote.send(('step', action))
125 |         self.waiting = True
126 | 
127 |     def step_wait(self):
128 |         results = [remote.recv() for remote in self.remotes]
129 |         self.waiting = False
130 |         obs, rews, dones, infos = zip(*results)
131 |         return np.stack(obs), np.stack(rews), np.stack(dones), infos
132 | 
133 |     def reset(self):
134 |         for remote in self.remotes:
135 |             remote.send(('reset', None))
136 |         return np.stack([remote.recv() for remote in self.remotes])
137 |         
138 |     def reset_idx(self, idx):
139 |         self.remotes[idx].send(('reset', None))
140 |         return self.remotes[idx].recv()        
141 | 
142 |     def reset_task(self):
143 |         for remote in self.remotes:
144 |             remote.send(('reset_task', None))
145 |         return np.stack([remote.recv() for remote in self.remotes])
146 | 
147 |     def close(self):
148 |         if self.closed:
149 |             return
150 |         if self.waiting:
151 |             for remote in self.remotes:            
152 |                 remote.recv()
153 |         for remote in self.remotes:
154 |             remote.send(('close', None))
155 |         for p in self.ps:
156 |             p.join()
157 |             self.closed = True
158 |             
159 |     def __len__(self):
160 |         return self.nenvs


--------------------------------------------------------------------------------
/OpenAI/HumanoidPyBulletEnv-v0/pretrained/actor_HumanoidPyBulletEnv-v0_checkpoint.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/HumanoidPyBulletEnv-v0/pretrained/actor_HumanoidPyBulletEnv-v0_checkpoint.pt


--------------------------------------------------------------------------------
/OpenAI/HumanoidPyBulletEnv-v0/pretrained/actor_HumanoidPyBulletEnv-v0_final.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/HumanoidPyBulletEnv-v0/pretrained/actor_HumanoidPyBulletEnv-v0_final.pt


--------------------------------------------------------------------------------
/OpenAI/HumanoidPyBulletEnv-v0/pretrained/critic_HumanoidPyBulletEnv-v0_checkpoint.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/HumanoidPyBulletEnv-v0/pretrained/critic_HumanoidPyBulletEnv-v0_checkpoint.pt


--------------------------------------------------------------------------------
/OpenAI/HumanoidPyBulletEnv-v0/pretrained/critic_HumanoidPyBulletEnv-v0_final.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/HumanoidPyBulletEnv-v0/pretrained/critic_HumanoidPyBulletEnv-v0_final.pt


--------------------------------------------------------------------------------
/OpenAI/LunarLander-v2/README.md:
--------------------------------------------------------------------------------
 1 | # LunarLander Problem
 2 | 
 3 | ### Getting Started
 4 | The environment to the LunarLanderContinuous is described [here](https://gym.openai.com/envs/LunarLanderContinuous-v2/).
 5 | 
 6 | ### Solution Video
 7 | [![LunarLanderContinuous-v2](http://img.youtube.com/vi/615X49z3u6o/0.jpg)](https://www.youtube.com/watch?v=615X49z3u6o "LunarLanderContinuous-v2")
 8 | 
 9 | The video shows in the first part the behaviour of the untrained agent and then in comparison the behaviour of the trained agent.
10 | 
11 | ### Solution Info
12 | My learning algorithm is a Deep Deterministic Policy Gradient.
13 | 
14 | DDPG is an actor-critic algorithm and primarily uses two neural networks.
15 | One for the actor and one for the critic. These networks calculate action vectors for the current state and and generate a temporal-difference error signal each time step.
16 | 
17 | DDPG uses a stochastic behavioral policy for good exploration and a deterministic target policy for estimating.
18 | 
19 | The current state is the input of the actuator network and the output is a single value representing the action. The deterministic policy gradient theorem provides the update rule for the weights of the actor network.
20 | 
21 | The critic's output is simply the estimated Q-value of the current state and the action given by the actor. The critic network is updated from the gradients obtained from the TD error signal.
22 | 
23 | More general information about DDPG in [this](https://arxiv.org/pdf/1509.02971.pdf) paper.
24 | 
25 | ### Instructions
26 | 
27 | start Jupyter Notebook `LunarLanderContinuous-v2 (DDPG).ipynb` and follow the instructions. 


--------------------------------------------------------------------------------
/OpenAI/LunarLander-v2/checkpoint_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/LunarLander-v2/checkpoint_actor.pth


--------------------------------------------------------------------------------
/OpenAI/LunarLander-v2/checkpoint_critic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/LunarLander-v2/checkpoint_critic.pth


--------------------------------------------------------------------------------
/OpenAI/LunarLander-v2/ddpg_agent.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import copy
  4 | from collections import namedtuple, deque
  5 | 
  6 | from model import Actor, Critic
  7 | 
  8 | import torch
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | 
 12 | BUFFER_SIZE = int(1e6)  # replay buffer size
 13 | BATCH_SIZE = 64        # minibatch size
 14 | GAMMA = 0.99            # discount factor
 15 | TAU = 1e-3              # for soft update of target parameters
 16 | LR_ACTOR = 1e-4         # learning rate of the actor
 17 | LR_CRITIC = 1e-3        # learning rate of the critic
 18 | WEIGHT_DECAY = 0.0001   # L2 weight decay
 19 | EPSILON = 1.0
 20 | EPSILON_MIN = 0.1
 21 | EPSILON_DECAY = 1e-6
 22 | 
 23 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 24 | 
 25 | class Agent():
 26 |     """Interacts with and learns from the environment."""
 27 | 
 28 |     def __init__(self, state_size, action_size, random_seed):
 29 |         """Initialize an Agent object.
 30 | 
 31 |         Params
 32 |         ======
 33 |             state_size (int): dimension of each state
 34 |             action_size (int): dimension of each action
 35 |             random_seed (int): random seed
 36 |         """
 37 |         self.state_size = state_size
 38 |         self.action_size = action_size
 39 |         self.seed = random.seed(random_seed)
 40 |         self.epsilon = EPSILON
 41 | 
 42 |         # Actor Network (w/ Target Network)
 43 |         self.actor_local = Actor(state_size, action_size, random_seed).to(device)
 44 |         self.actor_target = Actor(state_size, action_size, random_seed).to(device)
 45 |         self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)
 46 | 
 47 |         # Critic Network (w/ Target Network)
 48 |         self.critic_local = Critic(state_size, action_size, random_seed).to(device)
 49 |         self.critic_target = Critic(state_size, action_size, random_seed).to(device)
 50 |         self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
 51 | 
 52 |         # Noise process
 53 |         self.noise = OUNoise(action_size, random_seed)
 54 | 
 55 |         # Replay memory
 56 |         self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
 57 | 
 58 |         # Make sure target is with the same weight as the source
 59 |         self.hard_update(self.actor_target, self.actor_local)
 60 |         self.hard_update(self.critic_target, self.critic_local)
 61 | 
 62 |     def step(self, state, action, reward, next_state, done, timestep):
 63 |         """Save experience in replay memory, and use random sample from buffer to learn."""
 64 |         # Save experience / reward        
 65 |         self.memory.add(state, action, reward, next_state, done)
 66 | 
 67 |         # Learn, if enough samples are available in memory
 68 |         if len(self.memory) > BATCH_SIZE and timestep % 20 == 0:
 69 |             for _ in range(10):
 70 |                 experiences = self.memory.sample()
 71 |                 self.learn(experiences, GAMMA)
 72 | 
 73 |     def act(self, state, add_noise=True):
 74 |         """Returns actions for given state as per current policy."""
 75 | 
 76 |         state = torch.from_numpy(state).float().to(device)
 77 | 
 78 |         self.actor_local.eval()
 79 |         with torch.no_grad():
 80 |             action = self.actor_local(state).cpu().data.numpy()
 81 |         self.actor_local.train()
 82 | 
 83 |         if add_noise:
 84 |             action += self.epsilon * self.noise.sample()
 85 | 
 86 |         return action
 87 | 
 88 |     def reset(self):
 89 |         self.noise.reset()
 90 | 
 91 |     def learn(self, experiences, gamma):
 92 |         """Update policy and value parameters using given batch of experience tuples.
 93 |         Q_targets = r + ? * critic_target(next_state, actor_target(next_state))
 94 |         where:
 95 |             actor_target(state) -> action
 96 |             critic_target(state, action) -> Q-value
 97 | 
 98 |         Params
 99 |         ======
100 |             experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
101 |             gamma (float): discount factor
102 |         """
103 |         states, actions, rewards, next_states, dones = experiences
104 | 
105 |         # ---------------------------- update critic ---------------------------- #
106 |         # Get predicted next-state actions and Q values from target models
107 |         actions_next = self.actor_target(next_states)
108 |         Q_targets_next = self.critic_target(next_states, actions_next)
109 | 
110 |         # Compute Q targets for current states (y_i)
111 |         Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
112 | 
113 |         # Compute critic loss
114 |         Q_expected = self.critic_local(states, actions)
115 |         critic_loss = F.mse_loss(Q_expected, Q_targets)
116 | 
117 |         # Minimize the loss
118 |         self.critic_optimizer.zero_grad()
119 |         critic_loss.backward()
120 |         torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
121 |         self.critic_optimizer.step()
122 | 
123 |         # ---------------------------- update actor ---------------------------- #
124 |         # Compute actor loss
125 |         actions_pred = self.actor_local(states)
126 |         actor_loss = -self.critic_local(states, actions_pred).mean()
127 | 
128 |         # Minimize the loss
129 |         self.actor_optimizer.zero_grad()
130 |         actor_loss.backward()
131 |         self.actor_optimizer.step()
132 | 
133 |         # ----------------------- update target networks ----------------------- #
134 |         self.soft_update(self.critic_local, self.critic_target, TAU)
135 |         self.soft_update(self.actor_local, self.actor_target, TAU)
136 | 
137 |         # ---------------------------- update noise ---------------------------- #
138 |         if self.epsilon - EPSILON_DECAY > EPSILON_MIN:
139 |             self.epsilon -= EPSILON_DECAY
140 |             
141 |         self.noise.reset()
142 | 
143 |     def soft_update(self, local_model, target_model, tau):
144 |         """Soft update model parameters.
145 |         ?_target = t*?_local + (1 - t)*?_target
146 | 
147 |         Params
148 |         ======
149 |             local_model: PyTorch model (weights will be copied from)
150 |             target_model: PyTorch model (weights will be copied to)
151 |             tau (float): interpolation parameter
152 |         """
153 |         for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
154 |             target_param.data.copy_(tau*local_param.data + (1.0-tau)*target_param.data)
155 | 
156 |     def hard_update(self, target, source):
157 |         for target_param, param in zip(target.parameters(), source.parameters()):
158 |             target_param.data.copy_(param.data)
159 | 
160 | class OUNoise:
161 |     """Ornstein-Uhlenbeck process."""
162 | 
163 |     def __init__(self, size, seed, mu=0., theta=0.15, sigma=0.3):
164 |         """Initialize parameters and noise process."""
165 |         self.mu = mu * np.ones(size)
166 |         self.theta = theta
167 |         self.sigma = sigma
168 |         self.seed = random.seed(seed)
169 |         self.reset()
170 | 
171 |     def reset(self):
172 |         """Reset the internal state (= noise) to mean (mu)."""
173 |         self.state = copy.copy(self.mu)
174 | 
175 |     def sample(self):
176 |         """Update internal state and return it as a noise sample."""
177 |         x = self.state
178 |         dx = self.theta * (self.mu - x) + self.sigma * np.array([random.random() for i in range(len(x))])
179 |         self.state = x + dx
180 |         return self.state
181 | 
182 | class ReplayBuffer:
183 |     """Fixed-size buffer to store experience tuples."""
184 | 
185 |     def __init__(self, action_size, buffer_size, batch_size, seed):
186 |         """Initialize a ReplayBuffer object.
187 |         Params
188 |         ======
189 |             buffer_size (int): maximum size of buffer
190 |             batch_size (int): size of each training batch
191 |         """
192 |         self.action_size = action_size
193 |         self.memory = deque(maxlen=buffer_size)  # internal memory (deque)
194 |         self.batch_size = batch_size
195 |         self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
196 |         self.seed = random.seed(seed)
197 | 
198 |     def add(self, state, action, reward, next_state, done):
199 |         """Add a new experience to memory."""
200 |         e = self.experience(state, action, reward, next_state, done)
201 |         self.memory.append(e)
202 | 
203 |     def sample(self):
204 |         """Randomly sample a batch of experiences from memory."""
205 |         experiences = random.sample(self.memory, k=self.batch_size)
206 | 
207 |         states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
208 |         actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(device)
209 |         rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
210 |         next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
211 |         dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)
212 | 
213 |         return (states, actions, rewards, next_states, dones)
214 | 
215 |     def __len__(self):
216 |         """Return the current size of internal memory."""
217 |         return len(self.memory)


--------------------------------------------------------------------------------
/OpenAI/LunarLander-v2/model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | def hidden_init(layer):
 8 |     fan_in = layer.weight.data.size()[0]
 9 |     lim = 1. / np.sqrt(fan_in)
10 |     return (-lim, lim)
11 | 
12 | class Actor(nn.Module):
13 |     """Actor (Policy) Model."""
14 | 
15 |     def __init__(self, state_size, action_size, seed, fc1_units=128, fc2_units=64):
16 |         """Initialize parameters and build model.
17 |         Params
18 |         ======
19 |             state_size (int): Dimension of each state
20 |             action_size (int): Dimension of each action
21 |             seed (int): Random seed
22 |             fc1_units (int): Number of nodes in first hidden layer
23 |             fc2_units (int): Number of nodes in second hidden layer
24 |         """
25 |         super(Actor, self).__init__()
26 |         self.seed = torch.manual_seed(seed)
27 |         
28 |         self.fc1 = nn.Linear(state_size, fc1_units)
29 |         self.fc2 = nn.Linear(fc1_units, fc2_units)
30 |         self.fc3 = nn.Linear(fc2_units, action_size)
31 |         self.reset_parameters()
32 | 
33 |     def reset_parameters(self):
34 |         self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
35 |         self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
36 |         self.fc3.weight.data.uniform_(-3e-3, 3e-3)
37 | 
38 |     def forward(self, state):
39 |         """Build an actor (policy) network that maps states -> actions."""
40 |         x = state
41 |         x = F.relu(self.fc1(x))
42 |         x = F.relu(self.fc2(x))
43 |         return torch.tanh(self.fc3(x))
44 | 
45 | class Critic(nn.Module):
46 |     """Critic (Value) Model."""
47 | 
48 |     def __init__(self, state_size, action_size, seed, fcs1_units=128, fc2_units=64):
49 |         """Initialize parameters and build model.
50 |         Params
51 |         ======
52 |             state_size (int): Dimension of each state
53 |             action_size (int): Dimension of each action
54 |             seed (int): Random seed
55 |             fcs1_units (int): Number of nodes in the first hidden layer
56 |             fc2_units (int): Number of nodes in the second hidden layer
57 |             fc3_units (int): Number of nodes in the third hidden layer
58 |         """
59 |         super(Critic, self).__init__()
60 |         self.seed = torch.manual_seed(seed)
61 |         self.bn0 = nn.BatchNorm1d(state_size)
62 |         self.fcs1 = nn.Linear(state_size, fcs1_units)
63 |         self.fc2 = nn.Linear(fcs1_units+action_size, fc2_units)
64 |         self.fc3 = nn.Linear(fc2_units, 1)
65 |         self.reset_parameters()
66 | 
67 |     def reset_parameters(self):
68 |         self.fcs1.weight.data.uniform_(*hidden_init(self.fcs1))
69 |         self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
70 |         self.fc3.weight.data.uniform_(-3e-3, 3e-3)
71 | 
72 |     def forward(self, state, action):
73 |         """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
74 |         state = self.bn0(state)
75 |         xs = F.relu(self.fcs1(state))
76 |         x = torch.cat((xs, action), dim=1)
77 |         x = F.relu(self.fc2(x))
78 |         return self.fc3(x)


--------------------------------------------------------------------------------
/OpenAI/MountainCarContinuous-v0/Agent.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import copy
  4 | from collections import namedtuple, deque
  5 | 
  6 | from Model import Actor, Critic
  7 | from Noise import OUNoise
  8 | 
  9 | import torch
 10 | import torch.nn.functional as F
 11 | import torch.optim as optim
 12 | 
 13 | BUFFER_SIZE = int(1e6)  # replay buffer size
 14 | BATCH_SIZE = 64         # minibatch size
 15 | GAMMA = 0.99            # discount factor
 16 | TAU = 1e-3              # for soft update of target parameters
 17 | LR_ACTOR = 1e-2         # learning rate of the actor
 18 | LR_CRITIC = 5e-3        # learning rate of the critic
 19 | WEIGHT_DECAY = 0        # L2 weight decay
 20 | EPSILON_MAX = 1.0
 21 | EPSILON_MIN = 0.1
 22 | EPSILON_DECAY = 1e-6
 23 | LEARN_START = 20000
 24 | UPDATE_EVERY = 1
 25 | UPDATES_PER_STEP = 1
 26 | 
 27 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 28 | 
 29 | class Agent():
 30 |     """Interacts with and learns from the environment."""
 31 | 
 32 |     def __init__(self, state_size, action_size, random_seed):
 33 |         """Initialize an Agent object.
 34 | 
 35 |         Params
 36 |         ======
 37 |             state_size (int): dimension of each state
 38 |             action_size (int): dimension of each action
 39 |             random_seed (int): random seed
 40 |         """
 41 |         self.state_size = state_size
 42 |         self.action_size = action_size
 43 |         self.seed = random.seed(random_seed)
 44 |         self.epsilon = EPSILON_MAX
 45 | 
 46 |         # Actor Network (w/ Target Network)
 47 |         self.actor_local = Actor(state_size, action_size, random_seed).to(device)
 48 |         self.actor_target = Actor(state_size, action_size, random_seed).to(device)
 49 |         self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)
 50 | 
 51 |         # Critic Network (w/ Target Network)
 52 |         self.critic_local = Critic(state_size, action_size, random_seed).to(device)
 53 |         self.critic_target = Critic(state_size, action_size, random_seed).to(device)
 54 |         self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
 55 | 
 56 |         # Noise process
 57 |         self.noise = OUNoise(action_size, random_seed, mu=0, theta=0.15, sigma=0.2)
 58 | 
 59 |         # Replay memory
 60 |         self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
 61 | 
 62 |         # Make sure target is with the same weight as the source
 63 |         self.hard_update(self.actor_target, self.actor_local)
 64 |         self.hard_update(self.critic_target, self.critic_local)
 65 | 
 66 |         self.t_step = 0
 67 | 
 68 |     def step(self, state, action, reward, next_state, done, timestep):
 69 |         """Save experience in replay memory, and use random sample from buffer to learn."""
 70 |         # Save experience / reward        
 71 |         self.memory.add(state, action, reward, next_state, done)
 72 | 
 73 |         if len(self.memory) > LEARN_START:
 74 |             # Learn every UPDATE_EVERY time steps.
 75 |             self.t_step = (self.t_step + 1) % UPDATE_EVERY
 76 |             if self.t_step == 0:
 77 |                 # Learn, if enough samples are available in memory
 78 |                 if len(self.memory) > BATCH_SIZE:
 79 |                     for _ in range(UPDATES_PER_STEP):
 80 |                         experiences = self.memory.sample()
 81 |                         self.learn(experiences, GAMMA)
 82 | 
 83 |     def act(self, state, add_noise=True):
 84 |         """Returns actions for given state as per current policy."""
 85 | 
 86 |         state = torch.from_numpy(state).float().to(device)
 87 | 
 88 |         self.actor_local.eval()
 89 |         with torch.no_grad():
 90 |             action = self.actor_local(state).cpu().data.numpy()
 91 | 
 92 |         self.actor_local.train()
 93 | 
 94 |         if add_noise:
 95 |             action += self.epsilon * self.noise.sample()
 96 | 
 97 |         return np.clip(action, -1, 1)
 98 | 
 99 |     def reset(self):
100 |         self.noise.reset()
101 | 
102 |     def learn(self, experiences, gamma):
103 |         """Update policy and value parameters using given batch of experience tuples.
104 |         Q_targets = r + ? * critic_target(next_state, actor_target(next_state))
105 |         where:
106 |             actor_target(state) -> action
107 |             critic_target(state, action) -> Q-value
108 | 
109 |         Params
110 |         ======
111 |             experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
112 |             gamma (float): discount factor
113 |         """
114 |         states, actions, rewards, next_states, dones = experiences
115 | 
116 |         # ---------------------------- update critic ---------------------------- #
117 |         # Get predicted next-state actions and Q values from target models
118 |         actions_next = self.actor_target(next_states)
119 |         Q_targets_next = self.critic_target(next_states, actions_next)
120 | 
121 |         # Compute Q targets for current states (y_i)
122 |         Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
123 | 
124 |         # Compute critic loss
125 |         Q_expected = self.critic_local(states, actions)
126 |         critic_loss = F.mse_loss(Q_expected, Q_targets)
127 | 
128 |         # Minimize the loss
129 |         self.critic_optimizer.zero_grad()
130 |         critic_loss.backward()
131 |         #torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
132 |         self.critic_optimizer.step()
133 | 
134 |         # ---------------------------- update actor ---------------------------- #
135 |         # Compute actor loss
136 |         actions_pred = self.actor_local(states)
137 |         actor_loss = -self.critic_local(states, actions_pred).mean()
138 | 
139 |         # Minimize the loss
140 |         self.actor_optimizer.zero_grad()
141 |         actor_loss.backward()
142 |         self.actor_optimizer.step()
143 | 
144 |         # ----------------------- update target networks ----------------------- #
145 |         self.soft_update(self.critic_local, self.critic_target, TAU)
146 |         self.soft_update(self.actor_local, self.actor_target, TAU)
147 | 
148 |         # ---------------------------- update noise ---------------------------- #
149 |         if self.epsilon - EPSILON_DECAY > EPSILON_MIN:
150 |             self.epsilon -= EPSILON_DECAY
151 |         else:
152 |             self.epsilon = EPSILON_MIN
153 | 
154 |         self.noise.reset()
155 | 
156 |     def soft_update(self, local_model, target_model, tau):
157 |         """Soft update model parameters.
158 |         ?_target = t*?_local + (1 - t)*?_target
159 | 
160 |         Params
161 |         ======
162 |             local_model: PyTorch model (weights will be copied from)
163 |             target_model: PyTorch model (weights will be copied to)
164 |             tau (float): interpolation parameter
165 |         """
166 |         for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
167 |             target_param.data.copy_(tau*local_param.data + (1.0-tau)*target_param.data)
168 | 
169 |     def hard_update(self, target, source):
170 |         for target_param, param in zip(target.parameters(), source.parameters()):
171 |             target_param.data.copy_(param.data)
172 | 
173 | class ReplayBuffer:
174 |     """Fixed-size buffer to store experience tuples."""
175 | 
176 |     def __init__(self, action_size, buffer_size, batch_size, seed):
177 |         """Initialize a ReplayBuffer object.
178 |         Params
179 |         ======
180 |             buffer_size (int): maximum size of buffer
181 |             batch_size (int): size of each training batch
182 |         """
183 |         self.action_size = action_size
184 |         self.buffer_size = buffer_size
185 |         self.batch_size = batch_size
186 |         self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
187 |         self.seed = random.seed(seed)
188 | 
189 |         self.reset()
190 | 
191 |     def add(self, state, action, reward, next_state, done):
192 |         """Add a new experience to memory."""
193 |         e = self.experience(state, action, reward, next_state, done)
194 |         self.memory.append(e)
195 | 
196 |     def reset(self):
197 |         self.memory = deque(maxlen=self.buffer_size)
198 | 
199 |     def sample(self):
200 |         """Randomly sample a batch of experiences from memory."""
201 |         experiences = random.sample(self.memory, k=self.batch_size)
202 | 
203 |         states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
204 |         actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(device)
205 |         rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
206 |         next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
207 |         dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)
208 | 
209 |         return states, actions, rewards, next_states, dones
210 | 
211 |     def __len__(self):
212 |         """Return the current size of internal memory."""
213 |         return len(self.memory)


--------------------------------------------------------------------------------
/OpenAI/MountainCarContinuous-v0/Model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | def hidden_init(layer):
 8 |     fan_in = layer.weight.data.size()[0]
 9 |     lim = 1. / np.sqrt(fan_in)
10 |     return (-lim, lim)
11 | 
12 | 
13 | class Actor(nn.Module):
14 |     """Actor (Policy) Model."""
15 | 
16 |     def __init__(self, state_size, action_size, seed, fc1_units=5, fc2_units=5):
17 |         """Initialize parameters and build model.
18 |         Params
19 |         ======
20 |             state_size (int): Dimension of each state
21 |             action_size (int): Dimension of each action
22 |             seed (int): Random seed
23 |             fc1_units (int): Number of nodes in first hidden layer
24 |             fc2_units (int): Number of nodes in second hidden layer
25 |         """
26 |         super(Actor, self).__init__()
27 |         self.seed = torch.manual_seed(seed)
28 |         
29 |         self.fc1 = nn.Linear(state_size, fc1_units)
30 |         self.ln1 = nn.LayerNorm(fc1_units)
31 |         self.fc2 = nn.Linear(fc1_units, fc2_units)
32 |         self.ln2 = nn.LayerNorm(fc2_units)
33 |         self.fc3 = nn.Linear(fc2_units, action_size)
34 |         self.reset_parameters()
35 | 
36 |     def reset_parameters(self):
37 |         self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
38 |         self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
39 |         self.fc3.weight.data.uniform_(-3e-3, 3e-3)
40 | 
41 |     def forward(self, state):
42 |         """Build an actor (policy) network that maps states -> actions."""
43 |         x = state
44 |         x = self.fc1(x)
45 |         x = self.ln1(x)
46 |         x = F.relu(x)
47 |         x = self.fc2(x)
48 |         x = self.ln2(x)
49 |         x = F.relu(x)
50 |         x = self.fc3(x)
51 |         return torch.tanh(x)
52 | 
53 | 
54 | class Critic(nn.Module):
55 |     """Critic (Value) Model."""
56 | 
57 |     def __init__(self, state_size, action_size, seed, fc1_units=20, fc2_units=10):
58 |         """Initialize parameters and build model.
59 |         Params
60 |         ======
61 |             state_size (int): Dimension of each state
62 |             action_size (int): Dimension of each action
63 |             seed (int): Random seed
64 |             fcs1_units (int): Number of nodes in the first hidden layer
65 |             fc2_units (int): Number of nodes in the second hidden layer
66 |             fc3_units (int): Number of nodes in the third hidden layer
67 |         """
68 |         super(Critic, self).__init__()
69 |         self.seed = torch.manual_seed(seed)
70 |         self.fc1 = nn.Linear(state_size, fc1_units)
71 |         self.bn1 = nn.BatchNorm1d(fc1_units)
72 |         self.fc2 = nn.Linear(fc1_units+action_size, fc2_units)
73 |         self.fc3 = nn.Linear(fc2_units, 1)
74 |         self.reset_parameters()
75 | 
76 |     def reset_parameters(self):
77 |         self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
78 |         self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
79 |         self.fc3.weight.data.uniform_(-3e-3, 3e-3)
80 | 
81 |     def forward(self, state, action):
82 |         """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
83 |         xs = self.fc1(state)
84 |         xs = self.bn1(xs)
85 |         xs = F.leaky_relu(xs)
86 |         x = torch.cat((xs, action), dim=1)
87 |         x = self.fc2(x)
88 |         x = F.leaky_relu(x)
89 |         return self.fc3(x)
90 | 


--------------------------------------------------------------------------------
/OpenAI/MountainCarContinuous-v0/Noise.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import copy
 4 | 
 5 | class OUNoise:
 6 |     """Ornstein-Uhlenbeck process."""
 7 | 
 8 |     def __init__(self, size, seed, mu=0., theta=0.15, sigma=0.2):
 9 |         """Initialize parameters and noise process."""
10 |         self.mu = mu * np.ones(size)
11 |         self.theta = theta
12 |         self.sigma = sigma
13 |         self.seed = random.seed(seed)
14 |         self.reset()
15 | 
16 |     def reset(self):
17 |         """Reset the internal state (= noise) to mean (mu)."""
18 |         self.state = copy.copy(self.mu)
19 | 
20 |     def sample(self):
21 |         """Update internal state and return it as a noise sample."""
22 |         x = self.state
23 |         dx = self.theta * (self.mu - x) + self.sigma * np.array([random.random() for i in range(len(x))])
24 |         self.state = x + dx
25 |         return self.state
26 | 


--------------------------------------------------------------------------------
/OpenAI/MountainCarContinuous-v0/README.md:
--------------------------------------------------------------------------------
 1 | # MountainCarContinuous Problem
 2 | 
 3 | ### Getting Started
 4 | The environment to the MountainCarContinuous is described [here](https://github.com/openai/gym/wiki/MountainCarContinuous-v0).
 5 | 
 6 | ### Solution Video
 7 | [![LunarLanderContinuous-v2](http://img.youtube.com/vi/RGKRfxfEFEA/0.jpg)](https://www.youtube.com/watch?v=RGKRfxfEFEA "MountainCarContinuous-v0")
 8 | 
 9 | The video shows the solution of the environment after 32 episodes.
10 | 
11 | ### Solution Info
12 | My learning algorithm is a Deep Deterministic Policy Gradient.
13 | 
14 | DDPG is an actor-critic algorithm and primarily uses two neural networks.
15 | One for the actor and one for the critic. These networks calculate action vectors for the current state and and generate a temporal-difference error signal each time step.
16 | 
17 | DDPG uses a stochastic behavioral policy for good exploration and a deterministic target policy for estimating.
18 | 
19 | The current state is the input of the actuator network and the output is a single value representing the action. The deterministic policy gradient theorem provides the update rule for the weights of the actor network.
20 | 
21 | The critic's output is simply the estimated Q-value of the current state and the action given by the actor. The critic network is updated from the gradients obtained from the TD error signal.
22 | 
23 | More general information about DDPG in [this](https://arxiv.org/pdf/1509.02971.pdf) paper.
24 | 
25 | ### Instructions
26 | 
27 | start Jupyter Notebook `MountainCarContinuous-v0 (DDPG).ipynb` and follow the instructions. 


--------------------------------------------------------------------------------
/OpenAI/MountainCarContinuous-v0/checkpoint_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/MountainCarContinuous-v0/checkpoint_actor.pth


--------------------------------------------------------------------------------
/OpenAI/MountainCarContinuous-v0/checkpoint_critic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/MountainCarContinuous-v0/checkpoint_critic.pth


--------------------------------------------------------------------------------
/OpenAI/Taxi-v2/README.md:
--------------------------------------------------------------------------------
 1 | # Taxi Problem
 2 | 
 3 | ### Getting Started
 4 | 
 5 | Read the description of the environment in subsection 3.1 of [this paper](https://arxiv.org/pdf/cs/9905014.pdf).  You can verify that the description in the paper matches the OpenAI Gym environment by peeking at the code [here](https://github.com/openai/gym/blob/master/gym/envs/toy_text/taxi.py).
 6 | 
 7 | 
 8 | ### Instructions
 9 | 
10 | The repository contains three files:
11 | - `agent.py`: Develop your reinforcement learning agent here.  This is the only file that you should modify.
12 | - `monitor.py`: The `interact` function tests how well your agent learns from interaction with the environment.
13 | - `main.py`: Run this file in the terminal to check the performance of your agent.
14 | 
15 | Begin by running the following command in the terminal:
16 | ```
17 | python main.py
18 | ```
19 | 
20 | When you run `main.py`, the agent that you specify in `agent.py` interacts with the environment for 20,000 episodes.  The details of the interaction are specified in `monitor.py`, which returns two variables: `avg_rewards` and `best_avg_reward`.
21 | - `avg_rewards` is a deque where `avg_rewards[i]` is the average (undiscounted) return collected by the agent from episodes `i+1` to episode `i+100`, inclusive.  So, for instance, `avg_rewards[0]` is the average return collected by the agent over the first 100 episodes.
22 | - `best_avg_reward` is the largest entry in `avg_rewards`.  This is the final score that you should use when determining how well your agent performed in the task.
23 | 
24 | Your assignment is to modify the `agents.py` file to improve the agent's performance.
25 | - Use the `__init__()` method to define any needed instance variables.  Currently, we define the number of actions available to the agent (`nA`) and initialize the action values (`Q`) to an empty dictionary of arrays.  Feel free to add more instance variables; for example, you may find it useful to define the value of epsilon if the agent uses an epsilon-greedy policy for selecting actions.
26 | - The `select_action()` method accepts the environment state as input and returns the agent's choice of action.  The default code that we have provided randomly selects an action.
27 | - The `step()` method accepts a (`state`, `action`, `reward`, `next_state`) tuple as input, along with the `done` variable, which is `True` if the episode has ended.  The default code (which you should certainly change!) increments the action value of the previous state-action pair by 1.  You should change this method to use the sampled tuple of experience to update the agent's knowledge of the problem.
28 | 
29 | Once you have modified the function, you need only run `python main.py` to test your new agent.
30 | 
31 | OpenAI Gym [defines "solving"](https://gym.openai.com/envs/Taxi-v1/) this task as getting average return of 9.7 over 100 consecutive trials.  
32 | 


--------------------------------------------------------------------------------
/OpenAI/Taxi-v2/agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import defaultdict
 3 | 
 4 | class Agent:
 5 | 
 6 |     def __init__(self, nA=6):
 7 |         """ Initialize agent.
 8 | 
 9 |         Params
10 |         ======
11 |         - nA: number of actions available to the agent
12 |         """
13 |         self.i_episode = 1
14 |         self.alpha = .01
15 |         self.gamma = 1.0
16 |         self.nA = nA
17 |         self.Q = defaultdict(lambda: np.zeros(self.nA))
18 | 
19 |     def epsilon_greedy_probs(self, Q_s, eps=None):
20 |         """ obtains the action probabilities corresponding to epsilon-greedy policy """
21 |         epsilon = 1.0 / self.i_episode
22 |         if eps is not None:
23 |             epsilon = eps
24 |         policy_s = np.ones(self.nA) * epsilon / self.nA
25 |         policy_s[np.argmax(Q_s)] = 1 - epsilon + (epsilon / self.nA)
26 |         return policy_s
27 | 
28 |     def update_Q(self, Qsa, Qsa_next, reward, alpha, gamma):
29 |         """ updates the action-value function estimate using the most recent time step """
30 |         return Qsa + (alpha * (reward + (gamma * Qsa_next) - Qsa))
31 | 
32 |     def select_action(self, state):
33 |         """ Given the state, select an action.
34 | 
35 |         Params
36 |         ======
37 |         - state: the current state of the environment
38 | 
39 |         Returns
40 |         =======
41 |         - action: an integer, compatible with the task's action space
42 |         """
43 |         # get epsilon-greedy action probabilities
44 |         policy_s = self.epsilon_greedy_probs(self.Q[state])
45 | 
46 |         # pick next action A
47 |         return np.random.choice(np.arange(self.nA), p=policy_s)
48 | 
49 |     def step(self, state, action, reward, next_state, done):
50 |         """ Update the agent's knowledge, using the most recently sampled tuple.
51 | 
52 |         Params
53 |         ======
54 |         - state: the previous state of the environment
55 |         - action: the agent's previous choice of action
56 |         - reward: last reward received
57 |         - next_state: the current state of the environment
58 |         - done: whether the episode is complete (True or False)
59 |         """
60 |         # update Q
61 |         self.Q[state][action] = self.update_Q(self.Q[state][action], np.max(self.Q[next_state]), reward, self.alpha,
62 |                                               self.gamma)
63 | 
64 |         self.i_episode += 1
65 | 


--------------------------------------------------------------------------------
/OpenAI/Taxi-v2/main.py:
--------------------------------------------------------------------------------
1 | from agent import Agent
2 | from monitor import interact
3 | import gym
4 | import numpy as np
5 | 
6 | env = gym.make('Taxi-v2')
7 | agent = Agent()
8 | avg_rewards, best_avg_reward = interact(env, agent)


--------------------------------------------------------------------------------
/OpenAI/Taxi-v2/monitor.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | import sys
 3 | import math
 4 | import numpy as np
 5 | 
 6 | def interact(env, agent, num_episodes=20000, window=100):
 7 |     """ Monitor agent's performance.
 8 |     
 9 |     Params
10 |     ======
11 |     - env: instance of OpenAI Gym's Taxi-v1 environment
12 |     - agent: instance of class Agent (see Agent.py for details)
13 |     - num_episodes: number of episodes of agent-environment interaction
14 |     - window: number of episodes to consider when calculating average rewards
15 | 
16 |     Returns
17 |     =======
18 |     - avg_rewards: deque containing average rewards
19 |     - best_avg_reward: largest value in the avg_rewards deque
20 |     """
21 |     # initialize average rewards
22 |     avg_rewards = deque(maxlen=num_episodes)
23 |     # initialize best average reward
24 |     best_avg_reward = -math.inf
25 |     # initialize monitor for most recent rewards
26 |     samp_rewards = deque(maxlen=window)
27 |     # for each episode
28 |     for i_episode in range(1, num_episodes+1):
29 |         # begin the episode
30 |         state = env.reset()
31 |         # initialize the sampled reward
32 |         samp_reward = 0
33 |         while True:
34 |             # agent selects an action
35 |             action = agent.select_action(state)
36 |             # agent performs the selected action
37 |             next_state, reward, done, _ = env.step(action)
38 |             # agent performs internal updates based on sampled experience
39 |             agent.step(state, action, reward, next_state, done)
40 |             # update the sampled reward
41 |             samp_reward += reward
42 |             # update the state (s <- s') to next time step
43 |             state = next_state
44 |             if done:
45 |                 # save final sampled reward
46 |                 samp_rewards.append(samp_reward)
47 |                 break
48 |         if (i_episode >= 100):
49 |             # get average reward from last 100 episodes
50 |             avg_reward = np.mean(samp_rewards)
51 |             # append to deque
52 |             avg_rewards.append(avg_reward)
53 |             # update best average reward
54 |             if avg_reward > best_avg_reward:
55 |                 best_avg_reward = avg_reward
56 |         # monitor progress
57 |         print("\rEpisode {}/{} || Best average reward {}".format(i_episode, num_episodes, best_avg_reward), end="")
58 |         sys.stdout.flush()
59 |         # check if task is solved (according to OpenAI Gym)
60 |         if best_avg_reward >= 9.7:
61 |             print('\nEnvironment solved in {} episodes.'.format(i_episode), end="")
62 |             break
63 |         if i_episode == num_episodes: print('\n')
64 |     return avg_rewards, best_avg_reward


--------------------------------------------------------------------------------
/OpenAI/Taxi-v3/Reinforcement Learning.ppsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/OpenAI/Taxi-v3/Reinforcement Learning.ppsx


--------------------------------------------------------------------------------
/OpenAI/Taxi-v3/Taxi-v3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "commercial-proportion",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "#### Import dependencies"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "id": "favorite-cathedral",
 15 |    "metadata": {
 16 |     "scrolled": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import gym\n",
 21 |     "import random\n",
 22 |     "import numpy as np\n",
 23 |     "import time\n",
 24 |     "from IPython import display\n",
 25 |     "import matplotlib.pyplot as plt\n",
 26 |     "from collections import defaultdict\n",
 27 |     "import pylab as pl\n",
 28 |     "\n",
 29 |     "%matplotlib inline"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "id": "reduced-prime",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "### Load Environment"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 2,
 43 |    "id": "three-flood",
 44 |    "metadata": {
 45 |     "scrolled": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "env = gym.make(\"Taxi-v3\")"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "id": "stupid-thailand",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "### Inspect Environment"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 3,
 63 |    "id": "integral-sharing",
 64 |    "metadata": {
 65 |     "scrolled": true
 66 |    },
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "Action size  6\n",
 73 |       "State size  500\n"
 74 |      ]
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "# There are 6 discrete deterministic actions:\n",
 79 |     "# - 0: move south\n",
 80 |     "# - 1: move north\n",
 81 |     "# - 2: move east\n",
 82 |     "# - 3: move west\n",
 83 |     "# - 4: pickup passenger\n",
 84 |     "# - 5: drop off passenger\n",
 85 |     "\n",
 86 |     "action_size = env.action_space.n\n",
 87 |     "print(\"Action size \", action_size)\n",
 88 |     "\n",
 89 |     "# There are 500 discrete states since there are 25 taxi positions\n",
 90 |     "# 5 possible locations of the passenger (including the case when the passenger is in the taxi)\n",
 91 |     "# and 4 destination locations.\n",
 92 |     "# Start-Position is random\n",
 93 |     "state_size = env.observation_space.n\n",
 94 |     "print(\"State size \", state_size)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 4,
100 |    "id": "rocky-seventh",
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "+---------+\n",
108 |       "|\u001b[35mR\u001b[0m: | : :G|\n",
109 |       "| : | : : |\n",
110 |       "|\u001b[43m \u001b[0m: : : : |\n",
111 |       "| | : | : |\n",
112 |       "|Y| : |\u001b[34;1mB\u001b[0m: |\n",
113 |       "+---------+\n",
114 |       "\n"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "env.reset()\n",
120 |     "env.render()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "id": "measured-invalid",
126 |    "metadata": {},
127 |    "source": [
128 |     "### Agent"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 5,
134 |    "id": "alternate-greek",
135 |    "metadata": {
136 |     "scrolled": true
137 |    },
138 |    "outputs": [],
139 |    "source": [
140 |     "class Agent():\n",
141 |     "    def __init__(self, n_actions, n_states, gamma=0.9):\n",
142 |     "        self.n_actions = n_actions\n",
143 |     "        \n",
144 |     "        self.gamma = gamma\n",
145 |     "        self.Q = np.zeros((n_states, n_actions))\n",
146 |     "        \n",
147 |     "    def decay_schedule(self, init_value, min_value, decay_ratio, max_steps, log_start=-2, log_base=10):\n",
148 |     "        decay_steps = int(max_steps * decay_ratio)\n",
149 |     "        rem_steps = max_steps - decay_steps\n",
150 |     "        values = np.logspace(log_start, 0, decay_steps, base=log_base, endpoint=True)[::-1]\n",
151 |     "        values = (values - values.min()) / (values.max() - values.min())\n",
152 |     "        values = (init_value - min_value) * values + min_value\n",
153 |     "        values = np.pad(values, (0, rem_steps), 'edge')\n",
154 |     "        return values        \n",
155 |     "            \n",
156 |     "    def act(self, state, eps=0):\n",
157 |     "        if random.uniform(0, 1) < eps:\n",
158 |     "            return random.choice(np.arange(self.n_actions))        \n",
159 |     "        else:\n",
160 |     "            return np.argmax(self.Q[state])\n",
161 |     "    \n",
162 |     "    def learn(self, state, action, reward, next_state, done, alpha, algo='qlearn'):             \n",
163 |     "        if algo == 'qlearn':            \n",
164 |     "            #  Q-Learning\n",
165 |     "            td_target = reward + self.gamma * np.max(self.Q[next_state, :]) * (not done)\n",
166 |     "        \n",
167 |     "        else:        \n",
168 |     "            # SARSA\n",
169 |     "            td_target = reward + self.gamma * self.Q[next_state, self.act(next_state)] * (not done)\n",
170 |     "               \n",
171 |     "        td_error = td_target - self.Q[state, action]  \n",
172 |     "        \n",
173 |     "        self.Q[state, action] = self.Q[state, action] + alpha * td_error"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "id": "english-label",
179 |    "metadata": {},
180 |    "source": [
181 |     "### Q - Learning"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 6,
187 |    "id": "brilliant-scenario",
188 |    "metadata": {
189 |     "scrolled": true
190 |    },
191 |    "outputs": [],
192 |    "source": [
193 |     "def learning(n_actions, n_states, episodes=50000, max_steps=500, print_every=5000):\n",
194 |     "    agent = Agent(n_actions, n_states)\n",
195 |     "    \n",
196 |     "    alphas = agent.decay_schedule(0.9, 0.01, 0.2, episodes)\n",
197 |     "    epsilons = agent.decay_schedule(1.0, 0.01, 0.5, episodes)\n",
198 |     "    \n",
199 |     "    for n_episode in range(episodes):\n",
200 |     "        state = env.reset()        \n",
201 |     "                \n",
202 |     "        for n_step in range(max_steps):\n",
203 |     "            action = agent.act(state, epsilons[n_episode])\n",
204 |     "            next_state, reward, done, info = env.step(action)            \n",
205 |     "            \n",
206 |     "            agent.learn(state, action, reward, next_state, done, alphas[n_episode])\n",
207 |     "            \n",
208 |     "            state = next_state\n",
209 |     "            \n",
210 |     "            if done:      \n",
211 |     "                break\n",
212 |     "        \n",
213 |     "        if n_episode % print_every == 1:\n",
214 |     "            print('Episode: {0} done after {1} Steps.'.format(n_episode+1, n_step))\n",
215 |     "    \n",
216 |     "    print('Done.')\n",
217 |     "    env.close()\n",
218 |     "    \n",
219 |     "    return agent"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "id": "historic-charger",
225 |    "metadata": {},
226 |    "source": [
227 |     "#### Training"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 7,
233 |    "id": "reasonable-dimension",
234 |    "metadata": {},
235 |    "outputs": [
236 |     {
237 |      "name": "stdout",
238 |      "output_type": "stream",
239 |      "text": [
240 |       "Episode: 2 done after 199 Steps.\n",
241 |       "Episode: 5002 done after 21 Steps.\n",
242 |       "Episode: 10002 done after 12 Steps.\n",
243 |       "Episode: 15002 done after 17 Steps.\n",
244 |       "Episode: 20002 done after 15 Steps.\n",
245 |       "Episode: 25002 done after 15 Steps.\n",
246 |       "Episode: 30002 done after 11 Steps.\n",
247 |       "Episode: 35002 done after 9 Steps.\n",
248 |       "Episode: 40002 done after 11 Steps.\n",
249 |       "Episode: 45002 done after 13 Steps.\n",
250 |       "Done.\n"
251 |      ]
252 |     }
253 |    ],
254 |    "source": [
255 |     "agent = learning(action_size, state_size)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "id": "thirty-truck",
261 |    "metadata": {},
262 |    "source": [
263 |     "### Replay trained Agent"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 8,
269 |    "id": "closed-sport",
270 |    "metadata": {
271 |     "scrolled": true
272 |    },
273 |    "outputs": [],
274 |    "source": [
275 |     "def replay(agent, max_steps=20):    \n",
276 |     "    n_steps = 0\n",
277 |     "\n",
278 |     "    state, done = env.reset(), False\n",
279 |     "    rewards = 0\n",
280 |     "\n",
281 |     "    while not done and n_steps < max_steps:\n",
282 |     "        action = agent.act(state)\n",
283 |     "        next_state, reward, done, info = env.step(action) \n",
284 |     "        \n",
285 |     "        state = next_state\n",
286 |     "        rewards += reward\n",
287 |     "        \n",
288 |     "        display.clear_output(wait=True)\n",
289 |     "        env.render()\n",
290 |     "        time.sleep(.5)\n",
291 |     "\n",
292 |     "        n_steps+=1\n",
293 |     "\n",
294 |     "    print('Solved after {0} Steps.'.format(n_steps))"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 9,
300 |    "id": "african-output",
301 |    "metadata": {
302 |     "scrolled": false
303 |    },
304 |    "outputs": [
305 |     {
306 |      "name": "stdout",
307 |      "output_type": "stream",
308 |      "text": [
309 |       "+---------+\n",
310 |       "|R: | : :G|\n",
311 |       "| : | : : |\n",
312 |       "| : : : : |\n",
313 |       "| | : | : |\n",
314 |       "|\u001b[35m\u001b[34;1m\u001b[43mY\u001b[0m\u001b[0m\u001b[0m| : |B: |\n",
315 |       "+---------+\n",
316 |       "  (Dropoff)\n",
317 |       "Solved after 12 Steps.\n"
318 |      ]
319 |     }
320 |    ],
321 |    "source": [
322 |     "for _ in range(5):\n",
323 |     "    replay(agent)\n",
324 |     "    time.sleep(1)"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": null,
330 |    "id": "42e3925d",
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": []
334 |   }
335 |  ],
336 |  "metadata": {
337 |   "kernelspec": {
338 |    "display_name": "Python 3",
339 |    "language": "python",
340 |    "name": "python3"
341 |   },
342 |   "language_info": {
343 |    "codemirror_mode": {
344 |     "name": "ipython",
345 |     "version": 3
346 |    },
347 |    "file_extension": ".py",
348 |    "mimetype": "text/x-python",
349 |    "name": "python",
350 |    "nbconvert_exporter": "python",
351 |    "pygments_lexer": "ipython3",
352 |    "version": "3.8.8"
353 |   }
354 |  },
355 |  "nbformat": 4,
356 |  "nbformat_minor": 5
357 | }
358 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Reinforcement-Learning
 2 | 
 3 | ### Repository for OpenAI and Unity-ML Reinforcement Learning environments.
 4 | 
 5 | #### Q - Learning
 6 | [Taxi-v2 (Solution)](OpenAI/Taxi-v2)
 7 | 
 8 | #### Q - Learning or SARSA
 9 | [Taxi-v3 (Solution)](OpenAI/Taxi-v3) (Decay ε Greedy)
10 | 
11 | #### Pytorch  
12 | [LunarLanderContinuous-v2 (Solution DDPG)](OpenAI/LunarLander-v2)  
13 | [MountainCarContinuous-v0 (Solution DDPG)](OpenAI/MountainCarContinuous-v0)  
14 | [BipedalWalker-v2 (Solution TD3)](OpenAI/BipedalWalker-v2) solved after 1635 episodes  
15 | [BipedalWalker-v3 (Solution TD3)](OpenAI/BipedalWalker-v3) solved after 678 episodes  
16 | [HumanoidPyBulletEnv-v0 (Solution PPO)](OpenAI/HumanoidPyBulletEnv-v0)
17 | 
18 | #### Tensorflow / Keras
19 | [CartPole-v0 (Solution DDQN, Duelling DQN (incl. Prioritized Replay Buffer (PER))](OpenAI/CartPole-v0)
20 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Agent.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import copy
  4 | from collections import namedtuple, deque
  5 | 
  6 | from Model import Actor, Critic
  7 | from Noise import OUNoise
  8 | 
  9 | import torch
 10 | import torch.nn.functional as F
 11 | import torch.optim as optim
 12 | 
 13 | BUFFER_SIZE = int(1e6)  # replay buffer size
 14 | BATCH_SIZE = 1024       # minibatch size
 15 | GAMMA = 0.99            # discount factor
 16 | TAU = 1e-3              # for soft update of target parameters
 17 | LR_ACTOR = 1e-4         # learning rate of the actor
 18 | LR_CRITIC = 1e-3        # learning rate of the critic
 19 | WEIGHT_DECAY = 0   # L2 weight decay
 20 | EPSILON_MAX = 1.0
 21 | EPSILON_MIN = 0.1
 22 | EPSILON_DECAY = 0.995
 23 | LEARN_START = 0
 24 | UPDATE_EVERY = 1
 25 | UPDATES_PER_STEP = 1
 26 | 
 27 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 28 | 
 29 | class Agent():
 30 |     """Interacts with and learns from the environment."""
 31 | 
 32 |     def __init__(self, state_size, action_size, num_agents, random_seed):
 33 |         """Initialize an Agent object.
 34 | 
 35 |         Params
 36 |         ======
 37 |             state_size (int): dimension of each state
 38 |             action_size (int): dimension of each action
 39 |             num_agents (int): number of agents
 40 |             random_seed (int): random seed
 41 |         """
 42 |         self.state_size = state_size
 43 |         self.action_size = action_size
 44 |         self.num_agents = num_agents
 45 |         self.seed = random.seed(random_seed)
 46 |         self.epsilon = EPSILON_MAX
 47 | 
 48 |         # Actor Network (w/ Target Network)
 49 |         self.actor_local = Actor(state_size, action_size, random_seed).to(device)
 50 |         self.actor_target = Actor(state_size, action_size, random_seed).to(device)
 51 |         self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)
 52 | 
 53 |         # Critic Network (w/ Target Network)
 54 |         self.critic_local = Critic(state_size, action_size, random_seed).to(device)
 55 |         self.critic_target = Critic(state_size, action_size, random_seed).to(device)
 56 |         self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
 57 | 
 58 |         # Noise process
 59 |         self.noise = OUNoise(action_size, random_seed, mu=0, theta=0.15, sigma=0.2)
 60 | 
 61 |         # Noise process
 62 |         self.noise = [OUNoise(action_size, random_seed) for i in range(self.num_agents)]
 63 | 
 64 |         # Replay memory
 65 |         self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
 66 | 
 67 |         # Make sure target is with the same weight as the source
 68 |         self.hard_update(self.actor_target, self.actor_local)
 69 |         self.hard_update(self.critic_target, self.critic_local)
 70 | 
 71 |         self.t_step = 0
 72 | 
 73 |     def step(self, state, action, reward, next_state, done):
 74 |         """Save experience in replay memory, and use random sample from buffer to learn."""
 75 |         # Save experience / reward        
 76 |         self.memory.add(state, action, reward, next_state, done, self.num_agents)
 77 | 
 78 |         if len(self.memory) > LEARN_START:
 79 |             # Learn every UPDATE_EVERY time steps.
 80 |             self.t_step = (self.t_step + 1) % UPDATE_EVERY
 81 |             if self.t_step == 0:
 82 |                 # Learn, if enough samples are available in memory
 83 |                 if len(self.memory) > BATCH_SIZE:
 84 |                     for _ in range(UPDATES_PER_STEP):
 85 |                         experiences = self.memory.sample()
 86 |                         self.learn(experiences, GAMMA)
 87 | 
 88 |     def act(self, state, add_noise=True):
 89 |         """Returns actions for given state as per current policy."""
 90 |         #state = torch.from_numpy(state).float().unsqueeze(0).to(device)
 91 |         state = torch.from_numpy(state).float().to(device)
 92 | 
 93 |         self.actor_local.eval()
 94 |         with torch.no_grad():
 95 |             action = self.actor_local(state).cpu().data.numpy()
 96 | 
 97 |         self.actor_local.train()
 98 | 
 99 |         if add_noise:
100 |             for i in range(self.num_agents):
101 |                 agent_action = action[i]
102 |                 for j in agent_action:
103 |                     j += self.epsilon * self.noise[i].sample()
104 | 
105 |         actions = []
106 |         for i in range(len(action)):
107 |             actions.append(np.argmax(action[i]))
108 | 
109 |         #print(action)
110 |         return actions
111 | 
112 |     def reset(self):
113 |         for i in range(self.num_agents):
114 |             self.noise[i].reset()
115 | 
116 |     def learn(self, experiences, gamma):
117 |         """Update policy and value parameters using given batch of experience tuples.
118 |         Q_targets = r + ? * critic_target(next_state, actor_target(next_state))
119 |         where:
120 |             actor_target(state) -> action
121 |             critic_target(state, action) -> Q-value
122 | 
123 |         Params
124 |         ======
125 |             experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
126 |             gamma (float): discount factor
127 |         """
128 |         states, actions, rewards, next_states, dones = experiences
129 | 
130 |         # ---------------------------- update critic ---------------------------- #
131 |         # Get predicted next-state actions and Q values from target models
132 |         actions_next = self.actor_target(next_states)
133 |         Q_targets_next = self.critic_target(next_states, actions_next)
134 | 
135 |         # Compute Q targets for current states (y_i)
136 |         Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
137 | 
138 |         # Compute critic loss
139 |         Q_expected = self.critic_local(states, actions)
140 |         critic_loss = F.mse_loss(Q_expected, Q_targets)
141 | 
142 |         # Minimize the loss
143 |         self.critic_optimizer.zero_grad()
144 |         critic_loss.backward()
145 |         torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
146 |         self.critic_optimizer.step()
147 | 
148 |         # ---------------------------- update actor ---------------------------- #
149 |         # Compute actor loss
150 |         actions_pred = self.actor_local(states)
151 |         actor_loss = -self.critic_local(states, actions_pred).mean()
152 | 
153 |         # Minimize the loss
154 |         self.actor_optimizer.zero_grad()
155 |         actor_loss.backward()
156 |         self.actor_optimizer.step()
157 | 
158 |         # ----------------------- update target networks ----------------------- #
159 |         self.soft_update(self.critic_local, self.critic_target, TAU)
160 |         self.soft_update(self.actor_local, self.actor_target, TAU)
161 | 
162 |         # ---------------------------- update noise ---------------------------- #
163 |         if self.epsilon - EPSILON_DECAY > EPSILON_MIN:
164 |             self.epsilon -= EPSILON_DECAY
165 |         else:
166 |             self.epsilon = EPSILON_MIN
167 | 
168 |     def soft_update(self, local_model, target_model, tau):
169 |         """Soft update model parameters.
170 |         ?_target = t*?_local + (1 - t)*?_target
171 | 
172 |         Params
173 |         ======
174 |             local_model: PyTorch model (weights will be copied from)
175 |             target_model: PyTorch model (weights will be copied to)
176 |             tau (float): interpolation parameter
177 |         """
178 |         for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
179 |             target_param.data.copy_(tau*local_param.data + (1.0-tau)*target_param.data)
180 | 
181 |     def hard_update(self, target, source):
182 |         for target_param, param in zip(target.parameters(), source.parameters()):
183 |             target_param.data.copy_(param.data)
184 | 
185 | class ReplayBuffer:
186 |     """Fixed-size buffer to store experience tuples."""
187 | 
188 |     def __init__(self, action_size, buffer_size, batch_size, seed):
189 |         """Initialize a ReplayBuffer object.
190 |         Params
191 |         ======
192 |             buffer_size (int): maximum size of buffer
193 |             batch_size (int): size of each training batch
194 |         """
195 |         self.action_size = action_size
196 |         self.buffer_size = buffer_size
197 |         self.batch_size = batch_size
198 |         self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
199 |         self.seed = random.seed(seed)
200 | 
201 |         self.reset()
202 | 
203 |     def add(self, state, action, reward, next_state, done, num_agents):
204 |         """Add a new experience to memory."""
205 |         for i in range(num_agents):
206 |             e = self.experience(state[i], action[i], reward[i], next_state[i], done[i])
207 |             self.memory.append(e)
208 | 
209 |     def reset(self):
210 |         self.memory = deque(maxlen=self.buffer_size)
211 | 
212 |     def sample(self):
213 |         """Randomly sample a batch of experiences from memory."""
214 |         experiences = random.sample(self.memory, k=self.batch_size)
215 | 
216 |         states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
217 |         actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(device)
218 |         rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
219 |         next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
220 |         dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)
221 | 
222 |         return states, actions, rewards, next_states, dones
223 | 
224 |     def __len__(self):
225 |         """Return the current size of internal memory."""
226 |         return len(self.memory)


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | def hidden_init(layer):
  8 |     fan_in = layer.weight.data.size()[0]
  9 |     lim = 1. / np.sqrt(fan_in)
 10 |     return (-lim, lim)
 11 | 
 12 | 
 13 | class Actor(nn.Module):
 14 |     """Actor (Policy) Model."""
 15 | 
 16 |     def __init__(self, state_size, action_size, seed, fc1_units=128, fc2_units=64):
 17 |         """Initialize parameters and build model.
 18 |         Params
 19 |         ======
 20 |             state_size (int): Dimension of each state
 21 |             action_size (int): Dimension of each action
 22 |             seed (int): Random seed
 23 |             fc1_units (int): Number of nodes in first hidden layer
 24 |             fc2_units (int): Number of nodes in second hidden layer
 25 |         """
 26 |         super(Actor, self).__init__()
 27 |         self.seed = torch.manual_seed(seed)
 28 | 
 29 |         self.fc1 = nn.Linear(state_size, fc1_units)
 30 |         self.bn1 = nn.BatchNorm1d(fc1_units)
 31 |         self.fc2 = nn.Linear(fc1_units, fc2_units)
 32 |         self.bn2 = nn.BatchNorm1d(fc2_units)
 33 |         self.fc3 = nn.Linear(fc2_units, action_size)
 34 |         self.bn3 = nn.BatchNorm1d(action_size)
 35 |         self.softmax = nn.Softmax(dim=1)
 36 |         self.reset_parameters()
 37 | 
 38 |     def reset_parameters(self):
 39 |         self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
 40 |         self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
 41 |         self.fc3.weight.data.uniform_(-3e-3, 3e-3)
 42 | 
 43 |     def forward(self, state):
 44 |         """Build an actor (policy) network that maps states -> actions."""
 45 |         x = self.fc1(state)
 46 |         x = F.relu(x)
 47 |         x = self.bn1(x)
 48 |         x = self.fc2(x)
 49 |         x = F.relu(x)
 50 |         x = self.bn2(x)
 51 |         x = self.fc3(x)
 52 |         x = self.bn3(x)
 53 |         #return torch.tanh(x)
 54 |         #return self.softmax(x)
 55 | 
 56 |         # transform to logits
 57 |         return F.log_softmax(x)
 58 | 
 59 | 
 60 | class Critic(nn.Module):
 61 |     """Critic (Value) Model."""
 62 | 
 63 |     def __init__(self, state_size, action_size, seed, fc1_units=128, fc2_units=64):
 64 |         """Initialize parameters and build model.
 65 |         Params
 66 |         ======
 67 |             state_size (int): Dimension of each state
 68 |             action_size (int): Dimension of each action
 69 |             seed (int): Random seed
 70 |             fcs1_units (int): Number of nodes in the first hidden layer
 71 |             fc2_units (int): Number of nodes in the second hidden layer
 72 |             fc3_units (int): Number of nodes in the third hidden layer
 73 |         """
 74 |         super(Critic, self).__init__()
 75 |         self.seed = torch.manual_seed(seed)
 76 | 
 77 |         self.bn0 = nn.BatchNorm1d(state_size)
 78 |         self.fc1 = nn.Linear(state_size, fc1_units)
 79 |         self.bn1 = nn.BatchNorm1d(fc1_units)
 80 |         self.fc2 = nn.Linear(fc1_units+action_size, fc2_units)
 81 |         self.bn2 = nn.BatchNorm1d(fc2_units)
 82 |         self.fc3 = nn.Linear(fc2_units, 1)
 83 |         self.reset_parameters()
 84 | 
 85 |     def reset_parameters(self):
 86 |         self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
 87 |         self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
 88 |         self.fc3.weight.data.uniform_(-3e-3, 3e-3)
 89 | 
 90 |     def forward(self, state, action):
 91 |         """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
 92 |         state = self.bn0(state)
 93 |         xs = self.fc1(state)
 94 |         xs = self.bn1(xs)
 95 |         xs = F.leaky_relu(xs)
 96 |         x = torch.cat((xs, action), dim=1)
 97 |         x = self.fc2(x)
 98 |         x = self.bn2(x)
 99 |         x = F.leaky_relu(x)
100 |         return self.fc3(x)
101 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Noise.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import copy
 4 | 
 5 | class OUNoise:
 6 |     """Ornstein-Uhlenbeck process."""
 7 | 
 8 |     def __init__(self, size, seed, mu=0., theta=0.15, sigma=0.1):
 9 |         """Initialize parameters and noise process."""
10 |         self.mu = mu * np.ones(size)
11 |         self.theta = theta
12 |         self.sigma = sigma
13 |         self.seed = random.seed(seed)
14 |         self.reset()
15 | 
16 |     def reset(self):
17 |         """Reset the internal state (= noise) to mean (mu)."""
18 |         self.state = copy.copy(self.mu)
19 | 
20 |     def sample(self):
21 |         """Update internal state and return it as a noise sample."""
22 |         x = self.state
23 |         dx = self.theta * (self.mu - x) + self.sigma * np.array([random.random() for i in range(len(x))])
24 |         self.state = x + dx
25 |         return self.state
26 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/.DS_Store


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/2.0/Browsers/Compat.browser:
--------------------------------------------------------------------------------
 1 | <!--
 2 |     This file defines some of the browsers that Microsoft's implementation provides in
 3 |     <windir>\Microsoft.NET\Framework\<ver>\CONFIG\Browsers\*.browser
 4 | 	
 5 | 	It is not derived from any file distributed with Microsoft's implementation.  Since
 6 | 	we can't distribute MS's browser files, we use browscap.ini to determine 
 7 | 	browser capabilities.  Then, if and only if the application contains App_Browser/*.browser
 8 | 	files and we are using .NET 2.0 or higher, we supplement the capabilities with the 
 9 | 	information in those files and the files in this directory.  The primary goal of this file
10 | 	is provide browser definitions that might be referenced in App_Browser/*.browser files.
11 | -->
12 | <browsers>
13 |   <defaultBrowser id="Default">
14 |   </defaultBrowser>
15 |   <browser id="Default">
16 |     <identification>
17 |     	<userAgent match="." />
18 |     </identification>
19 |   </browser>
20 |   <browser id="IE6to9" parentID="Default">
21 |     <identification>
22 |       <capability name="majorver" match="^[6-9]" />
23 |       <capability name="browser" match="^(IE|AOL)$" />
24 |     </identification>
25 |   </browser>
26 |   <browser id="Opera8to9" parentID="Default">
27 |     <identification>
28 |       <capability name="majorver" match="^[8-9]" />
29 |       <capability name="browser" match="^Opera$" />
30 |     </identification>
31 |   </browser>
32 |   <browser id="Safari" parentID="Default">
33 |     <identification>
34 |       <capability name="browser" match="^Safari$" />
35 |     </identification>
36 |   </browser>
37 |   <browser id="Mozilla" parentID="Default">
38 |     <identification>
39 |       <capability name="browser" match="^Mozilla" />
40 |     </identification>
41 |   </browser>
42 | </browsers>


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/2.0/settings.map:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8" ?>
 2 | <settingsMap>
 3 |   <map sectionType="System.Web.Configuration.MembershipSection, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
 4 |        mapperType="Mono.Web.Util.MembershipSectionMapper, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
 5 |        platform="Unix">
 6 | 
 7 |     <!-- The 'what' tag specifies which region of the section to modify. The 'value' attribute value is mapper-specific and is not defined here. It can be
 8 |          any expression understood by the mapper to designate the section region to modify.
 9 |     -->
10 |     <what value="providers">
11 |       <!-- 'what' can contain any number of occurrences of any three elements:
12 |               replace - replace the designated region
13 | 	      add - add a new entry to the region
14 | 	      clear - clear the region
15 | 	      remove - remove the designatedregion
16 | 
17 |               The attributes to any of the above are freeform and are not processed by the mapper manager. They are stored verbatim for the
18 | 	      mapper to peruse.
19 |       -->
20 |       <replace name="AspNetSqlMembershipProvider" 
21 | 	       type="System.Web.Security.SqliteMembershipProvider, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" 
22 | 	       connectionStringName="LocalSqliteServer" />
23 |     </what>
24 |   </map>
25 | 
26 |   <map sectionType="System.Web.Configuration.RoleManagerSection, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
27 |        mapperType="Mono.Web.Util.RoleManagerSectionMapper, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
28 |        platform="Unix">
29 | 
30 |     <!-- The 'what' tag specifies which region of the section to modify. The 'value' attribute value is mapper-specific and is not defined here. It can be
31 |          any expression understood by the mapper to designate the section region to modify.
32 |     -->
33 |     <what value="providers">
34 |       <!-- 'what' can contain any number of occurrences of any three elements:
35 |               replace - replace the designated region
36 | 	      add - add a new entry to the region
37 | 	      clear - clear the region
38 | 	      remove - remove the designatedregion
39 | 
40 |               The attributes to any of the above are freeform and are not processed by the mapper manager. They are stored verbatim for the
41 | 	      mapper to peruse.
42 |       -->
43 |       <replace name="AspNetSqlRoleProvider" 
44 | 	       type="System.Web.Security.SqliteRoleProvider, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" 
45 | 	       connectionStringName="LocalSqliteServer" />
46 |     </what>
47 |   </map>
48 | </settingsMap>
49 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/2.0/web.config:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | 
  3 | <configuration>
  4 | 
  5 | 	<system.web>
  6 | 		<monoSettings>
  7 | 			<compilersCompatibility>
  8 | 				<compiler language="c#;cs;csharp" extension=".cs" compilerOptions="/nowarn:0169"
  9 | 					  type="Microsoft.CSharp.CSharpCodeProvider, System, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
 10 | 			</compilersCompatibility>
 11 | 		</monoSettings>
 12 | 		
 13 | 		<authorization>
 14 | 			<allow users="*" />
 15 | 		</authorization>
 16 | 		<httpHandlers>
 17 | 			<add verb="*" path="Trace.axd" type="System.Web.Handlers.TraceHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 18 | 			<add verb="*" path="UrlRouting.axd" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 19 | 			<add verb="*" path="*.aspx" type="System.Web.UI.PageHandlerFactory, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 20 | 			<add verb="*" path="*.asmx" validate="false" type="System.Web.Services.Protocols.WebServiceHandlerFactory, System.Web.Services, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 21 | 			<add verb="*" path="*.ashx" type="System.Web.UI.SimpleHandlerFactory, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 22 | 			<add verb="GET" path="WebResource.axd" type="System.Web.Handlers.AssemblyResourceLoader, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 23 | 			<add verb="*" path="*.master" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 24 | 			<add verb="*" path="*.resources" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 25 | 			<add verb="*" path="*.skin" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 26 | 			<add verb="*" path="*.browser" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 27 | 			<add verb="*" path="*.sitemap" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 28 | 			<add verb="*" path="*.webinfo" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 29 | 			<add verb="*" path="*.resx" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 30 | 			<add verb="*" path="*.asax" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 31 | 			<add verb="*" path="*.ascx" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 32 | 			<add verb="*" path="*.config" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 33 | 			<add verb="*" path="*.Config" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 34 | 			<add verb="*" path="*.cs" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 35 | 			<add verb="*" path="*.vb" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 36 | 			<add verb="*" path="*.csproj" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 37 | 			<add verb="*" path="*.vbproj" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 38 | 			<add verb="*" path="*.licx" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 39 | 			<add verb="*" path="*.dll" type="System.Web.HttpForbiddenHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 40 | 			<add verb="*" path="*.rem" type="System.Runtime.Remoting.Channels.Http.HttpRemotingHandlerFactory, System.Runtime.Remoting, Culture=neutral, PublicKeyToken=b77a5c561934e089" validate="false" />
 41 | 			<add verb="*" path="*.soap" type="System.Runtime.Remoting.Channels.Http.HttpRemotingHandlerFactory, System.Runtime.Remoting, Culture=neutral, PublicKeyToken=b77a5c561934e089" validate="false" />
 42 | 			<add verb="*" path="*.svc" type="System.ServiceModel.Channels.SvcHttpHandlerFactory, System.ServiceModel, Version=3.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
 43 | 			<add verb="GET,HEAD" path="*" type="System.Web.StaticFileHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 44 | 			<add verb="*" path="*" type="System.Web.HttpMethodNotAllowedHandler, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 45 | 		</httpHandlers>
 46 | 		<httpModules>
 47 | 			<add name="FormsAuthentication" type="System.Web.Security.FormsAuthenticationModule, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 48 | 			<add name="OutputCache" type="System.Web.Caching.OutputCacheModule, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 49 | 			<add name="RoleManager" type="System.Web.Security.RoleManagerModule, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 50 | 			<add name="Session" type="System.Web.SessionState.SessionStateModule, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 51 | 			<add name="UrlAuthorization" type="System.Web.Security.UrlAuthorizationModule, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 52 | 		</httpModules>
 53 | 		<authentication mode="Forms">
 54 | 			<forms name=".MONOAUTH" loginUrl="login.aspx" protection="All" timeout="30" path="/">
 55 | 				<credentials passwordFormat="Clear">
 56 | 					<!--<user name="gonzalo" password="gonz"/>-->
 57 | 				</credentials>
 58 | 			</forms>
 59 | 		</authentication>
 60 | 		<machineKey validationKey="AutoGenerate" decryptionKey="AutoGenerate" validation="SHA1" />
 61 | 		<globalization  requestEncoding="utf-8"
 62 | 				responseEncoding="utf-8"
 63 | 				fileEncoding="utf-8"/>
 64 | 		<!--
 65 | 				culture="en-US"
 66 | 				uiculture="en-US" />
 67 | 		-->
 68 | 		<sessionState mode="InProc" />
 69 | 		<pages>
 70 |         		<namespaces>
 71 |             			<add namespace="System" />
 72 |             			<add namespace="System.Collections" />
 73 |             			<add namespace="System.Collections.Specialized" />
 74 |             			<add namespace="System.Configuration" />
 75 |             			<add namespace="System.Text" />
 76 |             			<add namespace="System.Text.RegularExpressions" />
 77 |             			<add namespace="System.Web" />
 78 |             			<add namespace="System.Web.Caching" />
 79 |             			<add namespace="System.Web.SessionState" />
 80 |             			<add namespace="System.Web.Security" />
 81 |             			<add namespace="System.Web.Profile" />
 82 |             			<add namespace="System.Web.UI" />
 83 |             			<add namespace="System.Web.UI.WebControls" />
 84 |             			<!-- <add namespace="System.Web.UI.WebControls.WebParts" /> -->
 85 |             			<add namespace="System.Web.UI.HtmlControls" />
 86 |         		</namespaces>
 87 |     		</pages>
 88 | 		<webControls clientScriptsLocation="/web_scripts" />
 89 | 		<compilation debug="false" defaultLanguage="c#" explicit="true" strict="false" >
 90 | 			<assemblies>
 91 | 				<!--<add assembly="mscorlib" /> -->
 92 | 				<add assembly="System, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
 93 |             			<add assembly="System.Configuration, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 94 |             			<add assembly="System.Data, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
 95 |             			<add assembly="System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 96 |             			<add assembly="System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 97 |             			<add assembly="System.Web.Services, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
 98 | 				<add assembly="System.Runtime.Serialization, Version=3.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089, processorArchitecture=MSIL"/>
 99 | 				<add assembly="System.IdentityModel, Version=3.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089, processorArchitecture=MSIL"/>
100 | 				<add assembly="System.ServiceModel, Version=3.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089"/>
101 | 				<add assembly="System.ServiceModel.Web, Version=3.5.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
102 |             			<add assembly="System.Xml, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
103 | 				<add assembly="*" /> <!-- Add assemblies in bin directory -->
104 | 			</assemblies>
105 | 			<expressionBuilders>
106 | 				<add expressionPrefix="Resources"
107 | 				     type="System.Web.Compilation.ResourceExpressionBuilder, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
108 | 				<add expressionPrefix="ConnectionStrings"
109 | 				     type="System.Web.Compilation.ConnectionStringsExpressionBuilder, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
110 | 				<add expressionPrefix="AppSettings"
111 | 				     type="System.Web.Compilation.AppSettingsExpressionBuilder, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
112 | 			</expressionBuilders>
113 | 			<buildProviders>
114 | 				<add extension=".aspx" type="System.Web.Compilation.PageBuildProvider" />
115 | 				<add extension=".ascx" type="System.Web.Compilation.UserControlBuildProvider" />
116 | 				<add extension=".master" type="System.Web.Compilation.MasterPageBuildProvider" />
117 | 				<add extension=".asmx" type="System.Web.Compilation.WebServiceBuildProvider" />
118 | 				<add extension=".ashx" type="System.Web.Compilation.WebHandlerBuildProvider" />
119 | 				<add extension=".soap" type="System.Web.Compilation.WebServiceBuildProvider" />
120 | 				<add extension=".resx" type="System.Web.Compilation.ResXBuildProvider" />
121 | 				<add extension=".resources" type="System.Web.Compilation.ResourcesBuildProvider" />
122 | 				<add extension=".wsdl" type="System.Web.Compilation.WsdlBuildProvider" />
123 | 				<add extension=".xsd" type="System.Web.Compilation.XsdBuildProvider" />
124 | 				<add extension=".js" type="System.Web.Compilation.ForceCopyBuildProvider" />
125 | 				<add extension=".lic" type="System.Web.Compilation.IgnoreFileBuildProvider" />
126 | 				<add extension=".licx" type="System.Web.Compilation.IgnoreFileBuildProvider" />
127 | 				<add extension=".exclude" type="System.Web.Compilation.IgnoreFileBuildProvider" />
128 | 				<add extension=".refresh" type="System.Web.Compilation.IgnoreFileBuildProvider" />
129 | 			</buildProviders>
130 | 		</compilation>
131 | 		<httpRuntime executionTimeout="110"
132 | 			     maxRequestLength="4096"
133 | 			     useFullyQualifiedRedirectUrl="false"
134 | 			     minFreeThreads="8"
135 | 			     minLocalRequestFreeThreads="4"
136 | 			     appRequestQueueLimit="5000" />
137 | 		<clientTarget>
138 | 			<add alias="ie5" userAgent="Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)" />
139 | 			<add alias="ie4" userAgent="Mozilla/4.0 (compatible; MSIE 4.0; Windows NT 4.0)" />
140 | 			<add alias="uplevel" userAgent="Mozilla/4.0 (compatible; MSIE 4.0; Windows NT 4.0)" />
141 | 			<add alias="downlevel" userAgent="Unknown" />
142 | 		</clientTarget>
143 | 
144 | 		<siteMap>
145 | 			<providers>
146 | 				<add name="AspNetXmlSiteMapProvider"
147 | 				 description="Default site map provider that reads in .sitemap xml files."
148 | 				 type="System.Web.XmlSiteMapProvider, System.Web, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
149 | 				 siteMapFile="Web.sitemap" />
150 | 			</providers>
151 | 		</siteMap>
152 | 	</system.web>
153 | 
154 | </configuration>
155 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/4.0/Browsers/Compat.browser:
--------------------------------------------------------------------------------
 1 | <!--
 2 |     This file defines some of the browsers that Microsoft's implementation provides in
 3 |     <windir>\Microsoft.NET\Framework\<ver>\CONFIG\Browsers\*.browser
 4 | 	
 5 | 	It is not derived from any file distributed with Microsoft's implementation.  Since
 6 | 	we can't distribute MS's browser files, we use browscap.ini to determine 
 7 | 	browser capabilities.  Then, if and only if the application contains App_Browser/*.browser
 8 | 	files and we are using .NET 2.0 or higher, we supplement the capabilities with the 
 9 | 	information in those files and the files in this directory.  The primary goal of this file
10 | 	is provide browser definitions that might be referenced in App_Browser/*.browser files.
11 | -->
12 | <browsers>
13 |   <defaultBrowser id="Default">
14 |   </defaultBrowser>
15 |   <browser id="Default">
16 |     <identification>
17 |     	<userAgent match="." />
18 |     </identification>
19 |   </browser>
20 |   <browser id="IE6to9" parentID="Default">
21 |     <identification>
22 |       <capability name="majorver" match="^[6-9]" />
23 |       <capability name="browser" match="^(IE|AOL)$" />
24 |     </identification>
25 |   </browser>
26 |   <browser id="Opera8to9" parentID="Default">
27 |     <identification>
28 |       <capability name="majorver" match="^[8-9]" />
29 |       <capability name="browser" match="^Opera$" />
30 |     </identification>
31 |   </browser>
32 |   <browser id="Safari" parentID="Default">
33 |     <identification>
34 |       <capability name="browser" match="^Safari$" />
35 |     </identification>
36 |   </browser>
37 |   <browser id="Mozilla" parentID="Default">
38 |     <identification>
39 |       <capability name="browser" match="^Mozilla" />
40 |     </identification>
41 |   </browser>
42 | </browsers>


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/4.0/settings.map:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8" ?>
 2 | <settingsMap>
 3 |   <map sectionType="System.Web.Configuration.MembershipSection, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
 4 |        mapperType="Mono.Web.Util.MembershipSectionMapper, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
 5 |        platform="Unix">
 6 | 
 7 |     <!-- The 'what' tag specifies which region of the section to modify. The 'value' attribute value is mapper-specific and is not defined here. It can be
 8 |          any expression understood by the mapper to designate the section region to modify.
 9 |     -->
10 |     <what value="providers">
11 |       <!-- 'what' can contain any number of occurrences of any three elements:
12 |               replace - replace the designated region
13 | 	      add - add a new entry to the region
14 | 	      clear - clear the region
15 | 	      remove - remove the designatedregion
16 | 
17 |               The attributes to any of the above are freeform and are not processed by the mapper manager. They are stored verbatim for the
18 | 	      mapper to peruse.
19 |       -->
20 |       <replace name="AspNetSqlMembershipProvider" 
21 | 	       type="System.Web.Security.SqliteMembershipProvider, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" 
22 | 	       connectionStringName="LocalSqliteServer" />
23 |     </what>
24 |   </map>
25 | 
26 |   <map sectionType="System.Web.Configuration.RoleManagerSection, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
27 |        mapperType="Mono.Web.Util.RoleManagerSectionMapper, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
28 |        platform="Unix">
29 | 
30 |     <!-- The 'what' tag specifies which region of the section to modify. The 'value' attribute value is mapper-specific and is not defined here. It can be
31 |          any expression understood by the mapper to designate the section region to modify.
32 |     -->
33 |     <what value="providers">
34 |       <!-- 'what' can contain any number of occurrences of any three elements:
35 |               replace - replace the designated region
36 | 	      add - add a new entry to the region
37 | 	      clear - clear the region
38 | 	      remove - remove the designatedregion
39 | 
40 |               The attributes to any of the above are freeform and are not processed by the mapper manager. They are stored verbatim for the
41 | 	      mapper to peruse.
42 |       -->
43 |       <replace name="AspNetSqlRoleProvider" 
44 | 	       type="System.Web.Security.SqliteRoleProvider, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" 
45 | 	       connectionStringName="LocalSqliteServer" />
46 |     </what>
47 |   </map>
48 | </settingsMap>
49 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/4.0/web.config:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | 
  3 | <configuration>
  4 | 
  5 |   <system.codedom>
  6 |         <compilers>
  7 |             <compiler language="c#;cs;csharp" extension=".cs" warningLevel="4" type="Microsoft.CSharp.CSharpCodeProvider, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089">
  8 |                 <providerOption name="CompilerVersion" value="v4.0"/>
  9 |                 <providerOption name="WarnAsError" value="false"/>
 10 |             </compiler>
 11 |             <compiler language="vb;vbs;visualbasic;vbscript" extension=".vb" warningLevel="4" type="Microsoft.VisualBasic.VBCodeProvider, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089">
 12 |                 <providerOption name="CompilerVersion" value="v4.0"/>
 13 |                 <providerOption name="OptionInfer" value="true"/>
 14 |                 <providerOption name="WarnAsError" value="false"/>
 15 |             </compiler>
 16 |         </compilers>
 17 |   </system.codedom>
 18 | 
 19 | 	<system.web>
 20 | 		<monoSettings>
 21 | 			<compilersCompatibility>
 22 | 				<compiler language="c#;cs;csharp" extension=".cs" compilerOptions="/nowarn:0169"
 23 | 					  type="Microsoft.CSharp.CSharpCodeProvider, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
 24 | 			</compilersCompatibility>
 25 | 		</monoSettings>
 26 | 		
 27 | 		<authorization>
 28 | 			<allow users="*" />
 29 | 		</authorization>
 30 | 		<httpHandlers>
 31 | 		  <add path="trace.axd" verb="*" type="System.Web.Handlers.TraceHandler" validate="True" />
 32 | 		  <add path="WebResource.axd" verb="GET" type="System.Web.Handlers.AssemblyResourceLoader" validate="True" />
 33 | 		  <add verb="*" path="*_AppService.axd" type="System.Web.Script.Services.ScriptHandlerFactory, System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False" />
 34 | 		  <add verb="GET,HEAD" path="ScriptResource.axd" type="System.Web.Handlers.ScriptResourceHandler, System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False"/>
 35 | 		  <add path="*.axd" verb="*" type="System.Web.HttpNotFoundHandler" validate="True" />
 36 | 		  <add path="*.aspx" verb="*" type="System.Web.UI.PageHandlerFactory" validate="True" />
 37 | 		  <add path="*.ashx" verb="*" type="System.Web.UI.SimpleHandlerFactory" validate="True" />
 38 | 		  <add path="*.asmx" verb="*" type="System.Web.Script.Services.ScriptHandlerFactory, System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False" />
 39 | 		  <add path="*.rem" verb="*" type="System.Runtime.Remoting.Channels.Http.HttpRemotingHandlerFactory, System.Runtime.Remoting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" validate="False" />
 40 | 		  <add path="*.soap" verb="*" type="System.Runtime.Remoting.Channels.Http.HttpRemotingHandlerFactory, System.Runtime.Remoting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" validate="False" />
 41 | 		  <add path="*.asax" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 42 | 		  <add path="*.ascx" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 43 | 		  <add path="*.master" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 44 | 		  <add path="*.skin" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 45 | 		  <add path="*.browser" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 46 | 		  <add path="*.sitemap" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 47 | 		  <add path="*.dll.config" verb="GET,HEAD" type="System.Web.StaticFileHandler" validate="True" />
 48 | 		  <add path="*.exe.config" verb="GET,HEAD" type="System.Web.StaticFileHandler" validate="True" />
 49 | 		  <add path="*.config" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 50 | 		  <add path="*.cs" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 51 | 		  <add path="*.csproj" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 52 | 		  <add path="*.vb" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 53 | 		  <add path="*.vbproj" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 54 | 		  <add path="*.webinfo" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 55 | 		  <add path="*.licx" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 56 | 		  <add path="*.resx" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 57 | 		  <add path="*.resources" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 58 | 		  <add path="*.mdb" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 59 | 		  <add path="*.vjsproj" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 60 | 		  <add path="*.java" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 61 | 		  <add path="*.jsl" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 62 | 		  <add path="*.ldb" verb="*" type="System.Web.HttpForbiddenHandler"  validate="True" />
 63 | 		  <add path="*.ad" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 64 | 		  <add path="*.dd" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 65 | 		  <add path="*.ldd" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 66 | 		  <add path="*.sd" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 67 | 		  <add path="*.cd" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 68 | 		  <add path="*.adprototype" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 69 | 		  <add path="*.lddprototype" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 70 | 		  <add path="*.sdm" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 71 | 		  <add path="*.sdmDocument" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 72 | 		  <add path="*.mdf" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 73 | 		  <add path="*.ldf" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 74 | 		  <add path="*.exclude" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 75 | 		  <add path="*.refresh" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 76 | 		  <!--
 77 | 		  <add path="*.svc" verb="*" type="System.ServiceModel.Activation.HttpHandler, System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False"/>
 78 | 		  -->
 79 |                   <add verb="*" path="*.svc" type="System.ServiceModel.Channels.SvcHttpHandlerFactory, System.ServiceModel, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
 80 | 		  <add path="*.rules" verb="*" type="System.Web.HttpForbiddenHandler" validate="True"/>
 81 | 		  <!--
 82 | 		  <add path="*.xoml" verb="*" type="System.ServiceModel.Activation.HttpHandler, System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False"/>
 83 | 		  <add path="*.xamlx" verb="*" type="System.Xaml.Hosting.XamlHttpHandlerFactory, System.Xaml.Hosting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False"/>
 84 | 		  -->
 85 | 		  <add path="*" verb="GET,HEAD,POST" type="System.Web.DefaultHttpHandler" validate="True" />
 86 | 		  <add path="*" verb="*" type="System.Web.HttpMethodNotAllowedHandler" validate="True" />
 87 | 		</httpHandlers>
 88 | 		<httpModules>
 89 | 		  <add name="OutputCache" type="System.Web.Caching.OutputCacheModule" />
 90 | 		  <add name="Session" type="System.Web.SessionState.SessionStateModule" />
 91 | 		  <!--
 92 | 		  <add name="WindowsAuthentication" type="System.Web.Security.WindowsAuthenticationModule" />
 93 | 		  -->
 94 | 		  <add name="FormsAuthentication" type="System.Web.Security.FormsAuthenticationModule" />
 95 | 		  <!--
 96 | 		  <add name="PassportAuthentication" type="System.Web.Security.PassportAuthenticationModule" />
 97 | 		  -->
 98 | 		  <add name="RoleManager" type="System.Web.Security.RoleManagerModule" />
 99 | 		  <add name="UrlAuthorization" type="System.Web.Security.UrlAuthorizationModule" />
100 | 		  <!--
101 | 		  <add name="FileAuthorization" type="System.Web.Security.FileAuthorizationModule" />
102 | 		  -->
103 | 		  <add name="AnonymousIdentification" type="System.Web.Security.AnonymousIdentificationModule" />
104 | 		  <add name="Profile" type="System.Web.Profile.ProfileModule" />
105 | 		  <!--
106 | 		  <add name="ErrorHandlerModule" type="System.Web.Mobile.ErrorHandlerModule, System.Web.Mobile, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
107 | 		  <add name="ServiceModel" type="System.ServiceModel.Activation.HttpModule, System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
108 | 		  -->
109 | 		  <add name="UrlRoutingModule-4.0" type="System.Web.Routing.UrlRoutingModule" />
110 | 		  <add name="ScriptModule-4.0" type="System.Web.Handlers.ScriptModule, System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
111 | 		</httpModules>
112 | 		<authentication mode="Forms">
113 | 			<forms name=".MONOAUTH" loginUrl="login.aspx" protection="All" timeout="30" path="/">
114 | 				<credentials passwordFormat="Clear">
115 | 					<!--<user name="gonzalo" password="gonz"/>-->
116 | 				</credentials>
117 | 			</forms>
118 | 		</authentication>
119 | 		<machineKey validationKey="AutoGenerate" decryptionKey="AutoGenerate" validation="SHA1" />
120 | 		<globalization  requestEncoding="utf-8"
121 | 				responseEncoding="utf-8"
122 | 				fileEncoding="utf-8"/>
123 | 		<!--
124 | 				culture="en-US"
125 | 				uiculture="en-US" />
126 | 		-->
127 | 		<sessionState mode="InProc" />
128 | 		<pages>
129 |         		<namespaces>
130 |             			<add namespace="System" />
131 |             			<add namespace="System.Collections" />
132 |             			<add namespace="System.Collections.Specialized" />
133 |             			<add namespace="System.Configuration" />
134 |             			<add namespace="System.Text" />
135 |             			<add namespace="System.Text.RegularExpressions" />
136 |             			<add namespace="System.Web" />
137 |             			<add namespace="System.Web.Caching" />
138 |             			<add namespace="System.Web.SessionState" />
139 |             			<add namespace="System.Web.Security" />
140 |             			<add namespace="System.Web.Profile" />
141 |             			<add namespace="System.Web.UI" />
142 |             			<add namespace="System.Web.UI.WebControls" />
143 |             			<!-- <add namespace="System.Web.UI.WebControls.WebParts" /> -->
144 |             			<add namespace="System.Web.UI.HtmlControls" />
145 |         		</namespaces>
146 |         		
147 |         		<controls>
148 | 				<add tagPrefix="asp" namespace="System.Web.UI.WebControls.WebParts" assembly="System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
149 | 				<add tagPrefix="asp" namespace="System.Web.UI" assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
150 | 				<add tagPrefix="asp" namespace="System.Web.UI.WebControls" assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
151 | 				<add tagPrefix="asp" namespace="System.Web.UI.WebControls.Expressions" assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
152 | 				<add tagPrefix="asp" namespace="System.Web.DynamicData" assembly="System.Web.DynamicData, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
153 | 				<add tagPrefix="asp" namespace="System.Web.UI.WebControls" assembly="System.Web.Entity, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
154 | 			</controls>
155 |     		</pages>
156 | 		<webControls clientScriptsLocation="/web_scripts" />
157 | 		<compilation debug="false" defaultLanguage="c#" explicit="true" strict="false" >
158 | 			<assemblies>
159 | 				<add assembly="mscorlib" />
160 | 				<add assembly="Microsoft.CSharp, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
161 | 				<add assembly="System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
162 | 				<add assembly="System.Configuration, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
163 | 				<add assembly="System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
164 | 				<add assembly="System.Data, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
165 | 				<add assembly="System.Web.Services, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
166 | 				<add assembly="System.Xml, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
167 | 				<add assembly="System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
168 | 				<add assembly="System.EnterpriseServices, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
169 | 				<!-- <add assembly="System.Web.Mobile, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" /> -->
170 | 				<add assembly="System.IdentityModel, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
171 | 				<add assembly="System.Runtime.Serialization, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
172 | 				<add assembly="System.Xaml, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
173 | 				<add assembly="System.ServiceModel, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
174 | 				<!-- <add assembly="System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
175 | 				<!-- <add assembly="System.ServiceModel.Channels, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
176 | 				<add assembly="System.ServiceModel.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
177 | 				<!-- <add assembly="System.Activities, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
178 | 				<!-- <add assembly="System.ServiceModel.Activities, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
179 | 				<!-- <add assembly="System.WorkflowServices, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
180 | 				<!-- <add assembly="System.Xaml.Hosting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
181 | 				<add assembly="System.Core, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
182 | 				<add assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
183 | 				<add assembly="System.Data.DataSetExtensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
184 | 				<add assembly="System.Xml.Linq, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
185 | 				<add assembly="System.ComponentModel.DataAnnotations, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
186 | 				<add assembly="System.Web.DynamicData, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
187 | 				<!-- <add assembly="System.Data.Entity, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" /> -->
188 | 				<!-- <add assembly="System.Web.Entity, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089"/> -->
189 | 				<add assembly="System.Data.Linq, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
190 | 				<!-- <add assembly="System.Data.Entity.Design, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" /> -->
191 | 				<add assembly="System.Web.ApplicationServices, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
192 | 				<add assembly="*" /> <!-- Add assemblies in bin directory -->
193 | 			</assemblies>
194 | 			<expressionBuilders>
195 | 				<add expressionPrefix="Resources"
196 | 				     type="System.Web.Compilation.ResourceExpressionBuilder, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
197 | 				<add expressionPrefix="ConnectionStrings"
198 | 				     type="System.Web.Compilation.ConnectionStringsExpressionBuilder, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
199 | 				<add expressionPrefix="AppSettings"
200 | 				     type="System.Web.Compilation.AppSettingsExpressionBuilder, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
201 | 				<add expressionPrefix="RouteUrl" type="System.Web.Compilation.RouteUrlExpressionBuilder"/>
202 | 				<!--
203 | 				<add expressionPrefix="RouteValue" type="System.Web.Compilation.RouteValueExpressionBuilder"/>
204 | 				-->
205 | 			</expressionBuilders>
206 | 			<buildProviders>
207 | 				<add extension=".aspx" type="System.Web.Compilation.PageBuildProvider" />
208 | 				<add extension=".ascx" type="System.Web.Compilation.UserControlBuildProvider" />
209 | 				<add extension=".master" type="System.Web.Compilation.MasterPageBuildProvider" />
210 | 				<add extension=".asmx" type="System.Web.Compilation.WebServiceBuildProvider" />
211 | 				<add extension=".ashx" type="System.Web.Compilation.WebHandlerBuildProvider" />
212 | 				<add extension=".soap" type="System.Web.Compilation.WebServiceBuildProvider" />
213 | 				<add extension=".resx" type="System.Web.Compilation.ResXBuildProvider" />
214 | 				<add extension=".resources" type="System.Web.Compilation.ResourcesBuildProvider" />
215 | 				<add extension=".wsdl" type="System.Web.Compilation.WsdlBuildProvider" />
216 | 				<add extension=".xsd" type="System.Web.Compilation.XsdBuildProvider" />
217 | 				<add extension=".js" type="System.Web.Compilation.ForceCopyBuildProvider" />
218 | 				<add extension=".lic" type="System.Web.Compilation.IgnoreFileBuildProvider" />
219 | 				<add extension=".licx" type="System.Web.Compilation.IgnoreFileBuildProvider" />
220 | 				<add extension=".exclude" type="System.Web.Compilation.IgnoreFileBuildProvider" />
221 | 				<add extension=".refresh" type="System.Web.Compilation.IgnoreFileBuildProvider" />
222 | 				<!--
223 | 				<add extension=".edmx" type="System.Data.Entity.Design.AspNet.EntityDesignerBuildProvider" />
224 | 				<add extension=".xoml" type="System.ServiceModel.Activation.WorkflowServiceBuildProvider, System.WorkflowServices, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
225 | 				<add extension=".svc" type="System.ServiceModel.Activation.ServiceBuildProvider, System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
226 | 				<add extension=".xamlx" type="System.Xaml.Hosting.XamlBuildProvider, System.Xaml.Hosting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
227 | 				-->
228 | 			</buildProviders>
229 | 		</compilation>
230 | 		<httpRuntime executionTimeout="110"
231 | 			     maxRequestLength="4096"
232 | 			     useFullyQualifiedRedirectUrl="false"
233 | 			     minFreeThreads="8"
234 | 			     minLocalRequestFreeThreads="4"
235 | 			     appRequestQueueLimit="5000" />
236 | 		<clientTarget>
237 | 			<add alias="ie5" userAgent="Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)" />
238 | 			<add alias="ie4" userAgent="Mozilla/4.0 (compatible; MSIE 4.0; Windows NT 4.0)" />
239 | 			<add alias="uplevel" userAgent="Mozilla/4.0 (compatible; MSIE 4.0; Windows NT 4.0)" />
240 | 			<add alias="downlevel" userAgent="Unknown" />
241 | 		</clientTarget>
242 | 
243 | 		<siteMap>
244 | 			<providers>
245 | 				<add name="AspNetXmlSiteMapProvider"
246 | 				 description="Default site map provider that reads in .sitemap xml files."
247 | 				 type="System.Web.XmlSiteMapProvider, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
248 | 				 siteMapFile="Web.sitemap" />
249 | 			</providers>
250 | 		</siteMap>
251 | 	</system.web>
252 | 
253 | </configuration>
254 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/4.5/Browsers/Compat.browser:
--------------------------------------------------------------------------------
 1 | <!--
 2 |     This file defines some of the browsers that Microsoft's implementation provides in
 3 |     <windir>\Microsoft.NET\Framework\<ver>\CONFIG\Browsers\*.browser
 4 | 	
 5 | 	It is not derived from any file distributed with Microsoft's implementation.  Since
 6 | 	we can't distribute MS's browser files, we use browscap.ini to determine 
 7 | 	browser capabilities.  Then, if and only if the application contains App_Browser/*.browser
 8 | 	files and we are using .NET 2.0 or higher, we supplement the capabilities with the 
 9 | 	information in those files and the files in this directory.  The primary goal of this file
10 | 	is provide browser definitions that might be referenced in App_Browser/*.browser files.
11 | -->
12 | <browsers>
13 |   <defaultBrowser id="Default">
14 |   </defaultBrowser>
15 |   <browser id="Default">
16 |     <identification>
17 |     	<userAgent match="." />
18 |     </identification>
19 |   </browser>
20 |   <browser id="IE6to9" parentID="Default">
21 |     <identification>
22 |       <capability name="majorver" match="^[6-9]" />
23 |       <capability name="browser" match="^(IE|AOL)$" />
24 |     </identification>
25 |   </browser>
26 |   <browser id="Opera8to9" parentID="Default">
27 |     <identification>
28 |       <capability name="majorver" match="^[8-9]" />
29 |       <capability name="browser" match="^Opera$" />
30 |     </identification>
31 |   </browser>
32 |   <browser id="Safari" parentID="Default">
33 |     <identification>
34 |       <capability name="browser" match="^Safari$" />
35 |     </identification>
36 |   </browser>
37 |   <browser id="Mozilla" parentID="Default">
38 |     <identification>
39 |       <capability name="browser" match="^Mozilla" />
40 |     </identification>
41 |   </browser>
42 | </browsers>


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/4.5/settings.map:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8" ?>
 2 | <settingsMap>
 3 |   <map sectionType="System.Web.Configuration.MembershipSection, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
 4 |        mapperType="Mono.Web.Util.MembershipSectionMapper, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
 5 |        platform="Unix">
 6 | 
 7 |     <!-- The 'what' tag specifies which region of the section to modify. The 'value' attribute value is mapper-specific and is not defined here. It can be
 8 |          any expression understood by the mapper to designate the section region to modify.
 9 |     -->
10 |     <what value="providers">
11 |       <!-- 'what' can contain any number of occurrences of any three elements:
12 |               replace - replace the designated region
13 | 	      add - add a new entry to the region
14 | 	      clear - clear the region
15 | 	      remove - remove the designatedregion
16 | 
17 |               The attributes to any of the above are freeform and are not processed by the mapper manager. They are stored verbatim for the
18 | 	      mapper to peruse.
19 |       -->
20 |       <replace name="AspNetSqlMembershipProvider" 
21 | 	       type="System.Web.Security.SqliteMembershipProvider, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" 
22 | 	       connectionStringName="LocalSqliteServer" />
23 |     </what>
24 |   </map>
25 | 
26 |   <map sectionType="System.Web.Configuration.RoleManagerSection, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
27 |        mapperType="Mono.Web.Util.RoleManagerSectionMapper, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
28 |        platform="Unix">
29 | 
30 |     <!-- The 'what' tag specifies which region of the section to modify. The 'value' attribute value is mapper-specific and is not defined here. It can be
31 |          any expression understood by the mapper to designate the section region to modify.
32 |     -->
33 |     <what value="providers">
34 |       <!-- 'what' can contain any number of occurrences of any three elements:
35 |               replace - replace the designated region
36 | 	      add - add a new entry to the region
37 | 	      clear - clear the region
38 | 	      remove - remove the designatedregion
39 | 
40 |               The attributes to any of the above are freeform and are not processed by the mapper manager. They are stored verbatim for the
41 | 	      mapper to peruse.
42 |       -->
43 |       <replace name="AspNetSqlRoleProvider" 
44 | 	       type="System.Web.Security.SqliteRoleProvider, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" 
45 | 	       connectionStringName="LocalSqliteServer" />
46 |     </what>
47 |   </map>
48 | </settingsMap>
49 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/4.5/web.config:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | 
  3 | <configuration>
  4 | 
  5 |   <system.codedom>
  6 |         <compilers>
  7 |             <compiler language="c#;cs;csharp" extension=".cs" warningLevel="4" type="Microsoft.CSharp.CSharpCodeProvider, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089">
  8 |                 <providerOption name="CompilerVersion" value="v4.0"/>
  9 |                 <providerOption name="WarnAsError" value="false"/>
 10 |             </compiler>
 11 |             <compiler language="vb;vbs;visualbasic;vbscript" extension=".vb" warningLevel="4" type="Microsoft.VisualBasic.VBCodeProvider, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089">
 12 |                 <providerOption name="CompilerVersion" value="v4.0"/>
 13 |                 <providerOption name="OptionInfer" value="true"/>
 14 |                 <providerOption name="WarnAsError" value="false"/>
 15 |             </compiler>
 16 |         </compilers>
 17 |   </system.codedom>
 18 | 
 19 | 	<system.web>
 20 | 		<monoSettings>
 21 | 			<compilersCompatibility>
 22 | 				<compiler language="c#;cs;csharp" extension=".cs" compilerOptions="/nowarn:0169"
 23 | 					  type="Microsoft.CSharp.CSharpCodeProvider, System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
 24 | 			</compilersCompatibility>
 25 | 		</monoSettings>
 26 | 		
 27 | 		<authorization>
 28 | 			<allow users="*" />
 29 | 		</authorization>
 30 | 		<httpHandlers>
 31 | 		  <add path="trace.axd" verb="*" type="System.Web.Handlers.TraceHandler" validate="True" />
 32 | 		  <add path="WebResource.axd" verb="GET" type="System.Web.Handlers.AssemblyResourceLoader" validate="True" />
 33 | 		  <add verb="*" path="*_AppService.axd" type="System.Web.Script.Services.ScriptHandlerFactory, System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False" />
 34 | 		  <add verb="GET,HEAD" path="ScriptResource.axd" type="System.Web.Handlers.ScriptResourceHandler, System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False"/>
 35 | 		  <add path="*.axd" verb="*" type="System.Web.HttpNotFoundHandler" validate="True" />
 36 | 		  <add path="*.aspx" verb="*" type="System.Web.UI.PageHandlerFactory" validate="True" />
 37 | 		  <add path="*.ashx" verb="*" type="System.Web.UI.SimpleHandlerFactory" validate="True" />
 38 | 		  <add path="*.asmx" verb="*" type="System.Web.Script.Services.ScriptHandlerFactory, System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False" />
 39 | 		  <add path="*.rem" verb="*" type="System.Runtime.Remoting.Channels.Http.HttpRemotingHandlerFactory, System.Runtime.Remoting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" validate="False" />
 40 | 		  <add path="*.soap" verb="*" type="System.Runtime.Remoting.Channels.Http.HttpRemotingHandlerFactory, System.Runtime.Remoting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" validate="False" />
 41 | 		  <add path="*.asax" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 42 | 		  <add path="*.ascx" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 43 | 		  <add path="*.master" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 44 | 		  <add path="*.skin" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 45 | 		  <add path="*.browser" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 46 | 		  <add path="*.sitemap" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 47 | 		  <add path="*.dll.config" verb="GET,HEAD" type="System.Web.StaticFileHandler" validate="True" />
 48 | 		  <add path="*.exe.config" verb="GET,HEAD" type="System.Web.StaticFileHandler" validate="True" />
 49 | 		  <add path="*.config" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 50 | 		  <add path="*.cs" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 51 | 		  <add path="*.csproj" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 52 | 		  <add path="*.vb" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 53 | 		  <add path="*.vbproj" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 54 | 		  <add path="*.webinfo" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 55 | 		  <add path="*.licx" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 56 | 		  <add path="*.resx" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 57 | 		  <add path="*.resources" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 58 | 		  <add path="*.mdb" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 59 | 		  <add path="*.vjsproj" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 60 | 		  <add path="*.java" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 61 | 		  <add path="*.jsl" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 62 | 		  <add path="*.ldb" verb="*" type="System.Web.HttpForbiddenHandler"  validate="True" />
 63 | 		  <add path="*.ad" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 64 | 		  <add path="*.dd" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 65 | 		  <add path="*.ldd" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 66 | 		  <add path="*.sd" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 67 | 		  <add path="*.cd" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 68 | 		  <add path="*.adprototype" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 69 | 		  <add path="*.lddprototype" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 70 | 		  <add path="*.sdm" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 71 | 		  <add path="*.sdmDocument" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 72 | 		  <add path="*.mdf" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 73 | 		  <add path="*.ldf" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 74 | 		  <add path="*.exclude" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 75 | 		  <add path="*.refresh" verb="*" type="System.Web.HttpForbiddenHandler" validate="True" />
 76 | 		  <!--
 77 | 		  <add path="*.svc" verb="*" type="System.ServiceModel.Activation.HttpHandler, System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False"/>
 78 | 		  -->
 79 |                   <add verb="*" path="*.svc" type="System.ServiceModel.Channels.SvcHttpHandlerFactory, System.ServiceModel, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
 80 | 		  <add path="*.rules" verb="*" type="System.Web.HttpForbiddenHandler" validate="True"/>
 81 | 		  <!--
 82 | 		  <add path="*.xoml" verb="*" type="System.ServiceModel.Activation.HttpHandler, System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False"/>
 83 | 		  <add path="*.xamlx" verb="*" type="System.Xaml.Hosting.XamlHttpHandlerFactory, System.Xaml.Hosting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" validate="False"/>
 84 | 		  -->
 85 | 		  <add path="*" verb="GET,HEAD,POST" type="System.Web.DefaultHttpHandler" validate="True" />
 86 | 		  <add path="*" verb="*" type="System.Web.HttpMethodNotAllowedHandler" validate="True" />
 87 | 		</httpHandlers>
 88 | 		<httpModules>
 89 | 		  <add name="OutputCache" type="System.Web.Caching.OutputCacheModule" />
 90 | 		  <add name="Session" type="System.Web.SessionState.SessionStateModule" />
 91 | 		  <!--
 92 | 		  <add name="WindowsAuthentication" type="System.Web.Security.WindowsAuthenticationModule" />
 93 | 		  -->
 94 | 		  <add name="FormsAuthentication" type="System.Web.Security.FormsAuthenticationModule" />
 95 | 		  <!--
 96 | 		  <add name="PassportAuthentication" type="System.Web.Security.PassportAuthenticationModule" />
 97 | 		  -->
 98 | 		  <add name="RoleManager" type="System.Web.Security.RoleManagerModule" />
 99 | 		  <add name="UrlAuthorization" type="System.Web.Security.UrlAuthorizationModule" />
100 | 		  <!--
101 | 		  <add name="FileAuthorization" type="System.Web.Security.FileAuthorizationModule" />
102 | 		  -->
103 | 		  <add name="AnonymousIdentification" type="System.Web.Security.AnonymousIdentificationModule" />
104 | 		  <add name="Profile" type="System.Web.Profile.ProfileModule" />
105 | 		  <!--
106 | 		  <add name="ErrorHandlerModule" type="System.Web.Mobile.ErrorHandlerModule, System.Web.Mobile, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
107 | 		  <add name="ServiceModel" type="System.ServiceModel.Activation.HttpModule, System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
108 | 		  -->
109 | 		  <add name="UrlRoutingModule-4.0" type="System.Web.Routing.UrlRoutingModule" />
110 | 		  <add name="ScriptModule-4.0" type="System.Web.Handlers.ScriptModule, System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
111 | 		</httpModules>
112 | 		<authentication mode="Forms">
113 | 			<forms name=".MONOAUTH" loginUrl="login.aspx" protection="All" timeout="30" path="/">
114 | 				<credentials passwordFormat="Clear">
115 | 					<!--<user name="gonzalo" password="gonz"/>-->
116 | 				</credentials>
117 | 			</forms>
118 | 		</authentication>
119 | 		<machineKey validationKey="AutoGenerate" decryptionKey="AutoGenerate" validation="SHA1" />
120 | 		<globalization  requestEncoding="utf-8"
121 | 				responseEncoding="utf-8"
122 | 				fileEncoding="utf-8"/>
123 | 		<!--
124 | 				culture="en-US"
125 | 				uiculture="en-US" />
126 | 		-->
127 | 		<sessionState mode="InProc" />
128 | 		<pages>
129 |         		<namespaces>
130 |             			<add namespace="System" />
131 |             			<add namespace="System.Collections" />
132 |             			<add namespace="System.Collections.Specialized" />
133 |             			<add namespace="System.Configuration" />
134 |             			<add namespace="System.Text" />
135 |             			<add namespace="System.Text.RegularExpressions" />
136 |             			<add namespace="System.Web" />
137 |             			<add namespace="System.Web.Caching" />
138 |             			<add namespace="System.Web.SessionState" />
139 |             			<add namespace="System.Web.Security" />
140 |             			<add namespace="System.Web.Profile" />
141 |             			<add namespace="System.Web.UI" />
142 |             			<add namespace="System.Web.UI.WebControls" />
143 |             			<!-- <add namespace="System.Web.UI.WebControls.WebParts" /> -->
144 |             			<add namespace="System.Web.UI.HtmlControls" />
145 |         		</namespaces>
146 |         		
147 |         		<controls>
148 | 				<add tagPrefix="asp" namespace="System.Web.UI.WebControls.WebParts" assembly="System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
149 | 				<add tagPrefix="asp" namespace="System.Web.UI" assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
150 | 				<add tagPrefix="asp" namespace="System.Web.UI.WebControls" assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
151 | 				<add tagPrefix="asp" namespace="System.Web.UI.WebControls.Expressions" assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
152 | 				<add tagPrefix="asp" namespace="System.Web.DynamicData" assembly="System.Web.DynamicData, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
153 | 				<add tagPrefix="asp" namespace="System.Web.UI.WebControls" assembly="System.Web.Entity, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
154 | 			</controls>
155 |     		</pages>
156 | 		<webControls clientScriptsLocation="/web_scripts" />
157 | 		<compilation debug="false" defaultLanguage="c#" explicit="true" strict="false" >
158 | 			<assemblies>
159 | 				<!-- <add assembly="mscorlib" /> -->
160 | 				<add assembly="Microsoft.CSharp, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
161 | 				<add assembly="System, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
162 | 				<add assembly="System.Configuration, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
163 | 				<add assembly="System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
164 | 				<add assembly="System.Data, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
165 | 				<add assembly="System.Web.Services, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
166 | 				<add assembly="System.Xml, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
167 | 				<add assembly="System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
168 | 				<add assembly="System.EnterpriseServices, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
169 | 				<!-- <add assembly="System.Web.Mobile, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" /> -->
170 | 				<add assembly="System.IdentityModel, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
171 | 				<add assembly="System.Runtime.Serialization, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
172 | 				<add assembly="System.Xaml, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
173 | 				<add assembly="System.ServiceModel, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
174 | 				<!-- <add assembly="System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
175 | 				<!-- <add assembly="System.ServiceModel.Channels, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
176 | 				<add assembly="System.ServiceModel.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
177 | 				<!-- <add assembly="System.Activities, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
178 | 				<!-- <add assembly="System.ServiceModel.Activities, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
179 | 				<!-- <add assembly="System.WorkflowServices, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
180 | 				<!-- <add assembly="System.Xaml.Hosting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/> -->
181 | 				<add assembly="System.Core, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
182 | 				<add assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
183 | 				<add assembly="System.Data.DataSetExtensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
184 | 				<add assembly="System.Xml.Linq, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
185 | 				<add assembly="System.ComponentModel.DataAnnotations, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
186 | 				<add assembly="System.Web.DynamicData, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
187 | 				<!-- <add assembly="System.Data.Entity, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" /> -->
188 | 				<!-- <add assembly="System.Web.Entity, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089"/> -->
189 | 				<add assembly="System.Data.Linq, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
190 | 				<!-- <add assembly="System.Data.Entity.Design, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" /> -->
191 | 				<add assembly="System.Web.ApplicationServices, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
192 | 				<add assembly="*" /> <!-- Add assemblies in bin directory -->
193 | 			</assemblies>
194 | 			<expressionBuilders>
195 | 				<add expressionPrefix="Resources"
196 | 				     type="System.Web.Compilation.ResourceExpressionBuilder, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
197 | 				<add expressionPrefix="ConnectionStrings"
198 | 				     type="System.Web.Compilation.ConnectionStringsExpressionBuilder, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
199 | 				<add expressionPrefix="AppSettings"
200 | 				     type="System.Web.Compilation.AppSettingsExpressionBuilder, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
201 | 				<add expressionPrefix="RouteUrl" type="System.Web.Compilation.RouteUrlExpressionBuilder"/>
202 | 				<!--
203 | 				<add expressionPrefix="RouteValue" type="System.Web.Compilation.RouteValueExpressionBuilder"/>
204 | 				-->
205 | 			</expressionBuilders>
206 | 			<buildProviders>
207 | 				<add extension=".aspx" type="System.Web.Compilation.PageBuildProvider" />
208 | 				<add extension=".ascx" type="System.Web.Compilation.UserControlBuildProvider" />
209 | 				<add extension=".master" type="System.Web.Compilation.MasterPageBuildProvider" />
210 | 				<add extension=".asmx" type="System.Web.Compilation.WebServiceBuildProvider" />
211 | 				<add extension=".ashx" type="System.Web.Compilation.WebHandlerBuildProvider" />
212 | 				<add extension=".soap" type="System.Web.Compilation.WebServiceBuildProvider" />
213 | 				<add extension=".resx" type="System.Web.Compilation.ResXBuildProvider" />
214 | 				<add extension=".resources" type="System.Web.Compilation.ResourcesBuildProvider" />
215 | 				<add extension=".wsdl" type="System.Web.Compilation.WsdlBuildProvider" />
216 | 				<add extension=".xsd" type="System.Web.Compilation.XsdBuildProvider" />
217 | 				<add extension=".js" type="System.Web.Compilation.ForceCopyBuildProvider" />
218 | 				<add extension=".lic" type="System.Web.Compilation.IgnoreFileBuildProvider" />
219 | 				<add extension=".licx" type="System.Web.Compilation.IgnoreFileBuildProvider" />
220 | 				<add extension=".exclude" type="System.Web.Compilation.IgnoreFileBuildProvider" />
221 | 				<add extension=".refresh" type="System.Web.Compilation.IgnoreFileBuildProvider" />
222 | 				<!--
223 | 				<add extension=".edmx" type="System.Data.Entity.Design.AspNet.EntityDesignerBuildProvider" />
224 | 				<add extension=".xoml" type="System.ServiceModel.Activation.WorkflowServiceBuildProvider, System.WorkflowServices, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35"/>
225 | 				<add extension=".svc" type="System.ServiceModel.Activation.ServiceBuildProvider, System.ServiceModel.Activation, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
226 | 				<add extension=".xamlx" type="System.Xaml.Hosting.XamlBuildProvider, System.Xaml.Hosting, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
227 | 				-->
228 | 			</buildProviders>
229 | 		</compilation>
230 | 		<httpRuntime executionTimeout="110"
231 | 			     maxRequestLength="4096"
232 | 			     useFullyQualifiedRedirectUrl="false"
233 | 			     minFreeThreads="8"
234 | 			     minLocalRequestFreeThreads="4"
235 | 			     appRequestQueueLimit="5000" />
236 | 		<clientTarget>
237 | 			<add alias="ie5" userAgent="Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)" />
238 | 			<add alias="ie4" userAgent="Mozilla/4.0 (compatible; MSIE 4.0; Windows NT 4.0)" />
239 | 			<add alias="uplevel" userAgent="Mozilla/4.0 (compatible; MSIE 4.0; Windows NT 4.0)" />
240 | 			<add alias="downlevel" userAgent="Unknown" />
241 | 		</clientTarget>
242 | 
243 | 		<siteMap>
244 | 			<providers>
245 | 				<add name="AspNetXmlSiteMapProvider"
246 | 				 description="Default site map provider that reads in .sitemap xml files."
247 | 				 type="System.Web.XmlSiteMapProvider, System.Web, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a"
248 | 				 siteMapFile="Web.sitemap" />
249 | 			</providers>
250 | 		</siteMap>
251 | 	</system.web>
252 | 
253 | </configuration>
254 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/browscap.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/browscap.ini


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/MonoBleedingEdge/etc/mono/config:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 	<dllmap dll="i:cygwin1.dll" target="libc.dylib" os="!windows" />
 3 | 	<dllmap dll="libc" target="libc.dylib" os="!windows"/>
 4 | 	<dllmap dll="intl" target="libintl.dylib" os="!windows"/>
 5 | 	<dllmap dll="intl" name="bind_textdomain_codeset" target="libc.dylib" os="solaris"/>
 6 | 	<dllmap dll="libintl" name="bind_textdomain_codeset" target="libc.dylib" os="solaris"/>
 7 | 	<dllmap dll="libintl" target="libintl.dylib" os="!windows"/>
 8 | 	<dllmap dll="i:libxslt.dll" target="libxslt.dylib" os="!windows"/>
 9 | 	<dllmap dll="i:odbc32.dll" target="libodbc.dylib" os="!windows"/>
10 | 	<dllmap dll="i:odbc32.dll" target="libiodbc.dylib" os="osx"/>
11 | 	<dllmap dll="oci" target="libclntsh.dylib" os="!windows"/>
12 | 	<dllmap dll="db2cli" target="libdb2_36.dylib" os="!windows"/>
13 | 	<dllmap dll="MonoPosixHelper" target="$mono_libdir/libMonoPosixHelper.dylib" os="!windows" />
14 | 	<dllmap dll="libmono-btls-shared" target="$mono_libdir/libmono-btls-shared.dylib" os="!windows" />
15 | 	<dllmap dll="i:msvcrt" target="libc.dylib" os="!windows"/>
16 | 	<dllmap dll="i:msvcrt.dll" target="libc.dylib" os="!windows"/>
17 | 	<dllmap dll="sqlite" target="libsqlite.0.dylib" os="!windows"/>
18 | 	<dllmap dll="sqlite3" target="libsqlite3.0.dylib" os="!windows"/>
19 | 	<dllmap dll="libX11" target="libX11.dylib" os="!windows" />
20 | 	<dllmap dll="libgdk-x11-2.0" target="libgdk-x11-2.0.dylib" os="!windows"/>
21 | 	<dllmap dll="libgtk-x11-2.0" target="libgtk-x11-2.0.dylib" os="!windows"/>
22 | 	<dllmap dll="libXinerama" target="libXinerama.so.1" os="!windows" />
23 | 	<dllmap dll="libcairo-2.dll" target="libcairo.so.2" os="!windows"/>
24 | 	<dllmap dll="libcairo-2.dll" target="libcairo.2.dylib" os="osx"/>
25 | 	<dllmap dll="libcups" target="libcups.so.2" os="!windows"/>
26 | 	<dllmap dll="libcups" target="libcups.dylib" os="osx"/>
27 | 	<dllmap dll="i:kernel32.dll">
28 | 		<dllentry dll="__Internal" name="CopyMemory" target="mono_win32_compat_CopyMemory"/>
29 | 		<dllentry dll="__Internal" name="FillMemory" target="mono_win32_compat_FillMemory"/>
30 | 		<dllentry dll="__Internal" name="MoveMemory" target="mono_win32_compat_MoveMemory"/>
31 | 		<dllentry dll="__Internal" name="ZeroMemory" target="mono_win32_compat_ZeroMemory"/>
32 | 	</dllmap>
33 | 	<dllmap dll="gdiplus" target="/Users/builduser/buildslave/mono/build/external/buildscripts/add_to_build_results/monodistribution/lib/libgdiplus.dylib" os="!windows"/>
34 | 	<dllmap dll="gdiplus.dll" target="/Users/builduser/buildslave/mono/build/external/buildscripts/add_to_build_results/monodistribution/lib/libgdiplus.dylib"  os="!windows"/>
35 | 	<dllmap dll="gdi32" target="/Users/builduser/buildslave/mono/build/external/buildscripts/add_to_build_results/monodistribution/lib/libgdiplus.dylib" os="!windows"/>
36 | 	<dllmap dll="gdi32.dll" target="/Users/builduser/buildslave/mono/build/external/buildscripts/add_to_build_results/monodistribution/lib/libgdiplus.dylib" os="!windows"/>
37 | </configuration>
38 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/Resources/unity default resources:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/Resources/unity default resources


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/Resources/unity_builtin_extra:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/Resources/unity_builtin_extra


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/app.info:
--------------------------------------------------------------------------------
1 | Unity Technologies
2 | Unity Environment


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/boot.config:
--------------------------------------------------------------------------------
1 | wait-for-native-debugger=0
2 | scripting-runtime-version=latest
3 | 


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/globalgamemanagers:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/globalgamemanagers


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/globalgamemanagers.assets:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/globalgamemanagers.assets


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/level0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/level0


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/resources.assets:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/resources.assets


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/sharedassets0.assets:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/sharedassets0.assets


--------------------------------------------------------------------------------
/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/sharedassets0.assets.resS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/Soccer_Windows_x86_64/Soccer_Data/sharedassets0.assets.resS


--------------------------------------------------------------------------------
/Unity-ML/Soccer/checkpoint_goalie_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/checkpoint_goalie_actor.pth


--------------------------------------------------------------------------------
/Unity-ML/Soccer/checkpoint_goalie_critic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/checkpoint_goalie_critic.pth


--------------------------------------------------------------------------------
/Unity-ML/Soccer/checkpoint_striker_actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/checkpoint_striker_actor.pth


--------------------------------------------------------------------------------
/Unity-ML/Soccer/checkpoint_striker_critic.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tobiassteidle/Reinforcement-Learning/c3b1edbe4ef9470015041c9794e2198c25eaa4d7/Unity-ML/Soccer/checkpoint_striker_critic.pth


--------------------------------------------------------------------------------