├── .gitignore
├── README.md
├── dqn
    ├── LICENSE
    ├── NeuralQLearner.lua
    ├── NeuralQLearnerB.lua
    ├── Rectifier.lua
    ├── Scale.lua
    ├── TransitionTable.lua
    ├── convnet.lua
    ├── convnet_atari3.lua
    ├── extract_data.lua
    ├── initenv.lua
    ├── msleep.c
    ├── net_downsample_2x_full_y.lua
    ├── nnutils.lua
    ├── plot_results.lua
    ├── test_2agent.lua
    ├── test_agent.lua
    ├── train_2agent.lua
    └── train_agent.lua
├── extract_data
├── install_dependencies.sh
├── plot_2results
├── plot_results
├── plots
    ├── Pong2Player.csv
    ├── Pong2Player0.csv
    ├── Pong2Player025.csv
    ├── Pong2Player025_average_reward.png
    ├── Pong2Player025_episode_counts.png
    ├── Pong2Player025_history_A.csv
    ├── Pong2Player025_history_B.csv
    ├── Pong2Player025_meanq.png
    ├── Pong2Player025_reward_counts.png
    ├── Pong2Player025_tderror.png
    ├── Pong2Player025_time.png
    ├── Pong2Player025p.csv
    ├── Pong2Player025p_average_reward.png
    ├── Pong2Player025p_episode_counts.png
    ├── Pong2Player025p_history_A.csv
    ├── Pong2Player025p_history_B.csv
    ├── Pong2Player025p_meanq.png
    ├── Pong2Player025p_reward_counts.png
    ├── Pong2Player025p_tderror.png
    ├── Pong2Player025p_time.png
    ├── Pong2Player05.csv
    ├── Pong2Player05_average_reward.png
    ├── Pong2Player05_episode_counts.png
    ├── Pong2Player05_history_A.csv
    ├── Pong2Player05_history_B.csv
    ├── Pong2Player05_meanq.png
    ├── Pong2Player05_reward_counts.png
    ├── Pong2Player05_tderror.png
    ├── Pong2Player05_time.png
    ├── Pong2Player05p.csv
    ├── Pong2Player05p_average_reward.png
    ├── Pong2Player05p_episode_counts.png
    ├── Pong2Player05p_history_A.csv
    ├── Pong2Player05p_history_B.csv
    ├── Pong2Player05p_meanq.png
    ├── Pong2Player05p_reward_counts.png
    ├── Pong2Player05p_tderror.png
    ├── Pong2Player05p_time.png
    ├── Pong2Player075.csv
    ├── Pong2Player075_average_reward.png
    ├── Pong2Player075_episode_counts.png
    ├── Pong2Player075_history_A.csv
    ├── Pong2Player075_history_B.csv
    ├── Pong2Player075_meanq.png
    ├── Pong2Player075_reward_counts.png
    ├── Pong2Player075_tderror.png
    ├── Pong2Player075_time.png
    ├── Pong2Player075p.csv
    ├── Pong2Player075p_average_reward.png
    ├── Pong2Player075p_episode_counts.png
    ├── Pong2Player075p_history_A.csv
    ├── Pong2Player075p_history_B.csv
    ├── Pong2Player075p_meanq.png
    ├── Pong2Player075p_reward_counts.png
    ├── Pong2Player075p_tderror.png
    ├── Pong2Player075p_time.png
    ├── Pong2Player0_average_reward.png
    ├── Pong2Player0_episode_counts.png
    ├── Pong2Player0_history_A.csv
    ├── Pong2Player0_history_B.csv
    ├── Pong2Player0_meanq.png
    ├── Pong2Player0_reward_counts.png
    ├── Pong2Player0_tderror.png
    ├── Pong2Player0_time.png
    ├── Pong2PlayerVS.csv
    ├── Pong2PlayerVS_average_reward.png
    ├── Pong2PlayerVS_episode_counts.png
    ├── Pong2PlayerVS_history_A.csv
    ├── Pong2PlayerVS_history_B.csv
    ├── Pong2PlayerVS_meanq.png
    ├── Pong2PlayerVS_reward_counts.png
    ├── Pong2PlayerVS_tderror.png
    ├── Pong2PlayerVS_time.png
    ├── Pong2Player_average_reward.png
    ├── Pong2Player_episode_counts.png
    ├── Pong2Player_history_A.csv
    ├── Pong2Player_history_B.csv
    ├── Pong2Player_meanq.png
    ├── Pong2Player_reward_counts.png
    ├── Pong2Player_tderror.png
    ├── Pong2Player_time.png
    ├── plot.py
    ├── plot_history.py
    ├── scatter.py
    ├── serving_time_history.png
    ├── serving_time_history_competitive.png
    ├── serving_time_history_cooperative.png
    ├── serving_time_per_point.png
    ├── serving_time_per_point_scatter.png
    ├── sidebounces_history.png
    ├── sidebounces_history_competitive.png
    ├── sidebounces_history_cooperative.png
    ├── sidebounces_per_point.png
    ├── sidebounces_per_point_scatter.png
    ├── stats.tex
    ├── table.py
    ├── wallbounces_history.png
    ├── wallbounces_history_competitive.png
    ├── wallbounces_history_cooperative.png
    ├── wallbounces_per_sidebounce.png
    └── wallbounces_per_sidebounce_scatter.png
├── roms
    ├── Pong2Player.bin
    ├── Pong2Player0.bin
    ├── Pong2Player025.bin
    ├── Pong2Player025p.bin
    ├── Pong2Player05.bin
    ├── Pong2Player05p.bin
    ├── Pong2Player075.bin
    ├── Pong2Player075p.bin
    ├── Pong2PlayerVS.bin
    ├── README
    ├── breakout.bin
    ├── breakout_2player.bin
    ├── pong.bin
    ├── surround.bin
    ├── wizard_of_wor.bin
    └── wizard_of_wor_2player.bin
├── run_cpu
├── run_gpu
├── run_gpu2
├── run_gpu2_resume
├── test_cpu
├── test_gpu
├── test_gpu2
├── test_gpu2_seeds
├── test_gpu2_versions
└── test_schemes


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | torch
3 | dqn/*.t7
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DeepMind Atari Deep Q Learner for 2 players
  2 | 
  3 | This repository hosts the code to reproduce the experiments in the article "Multiagent Cooperation and Competition with Deep
  4 | Reinforcement Learning". It is based on DeepMind's [original code](https://sites.google.com/a/deepmind.com/dqn/), that was modified to support two players. **NB!** Currently only Pong game in two-player mode is supported, support for other games and one-player mode is untested.
  5 | 
  6 | Gameplay videos can be found here: https://www.youtube.com/playlist?list=PLfLv_F3r0TwyaZPe50OOUx8tRf0HwdR_u
  7 | 
  8 | Installation instructions
  9 | -------------------------
 10 | 
 11 | The installation requires Linux with apt-get.
 12 | 
 13 | Note: In order to run the GPU version of DQN, you should additionally have the
 14 | NVIDIA® CUDA® (version 5.5 or later) toolkit installed prior to the Torch
 15 | installation below.
 16 | This can be downloaded from https://developer.nvidia.com/cuda-toolkit
 17 | and installation instructions can be found in
 18 | http://docs.nvidia.com/cuda/cuda-getting-started-guide-for-linux
 19 | 
 20 | To train DQN on Atari games, the following components must be installed:
 21 | * LuaJIT and Torch 7.0
 22 | * nngraph
 23 | * Xitari (fork of the Arcade Learning Environment (Bellemare et al., 2013))
 24 | * AleWrap (a lua interface to Xitari)
 25 | 
 26 | To install all of the above in a subdirectory called 'torch', it should be enough to run
 27 | 
 28 |     ./install_dependencies.sh
 29 | 
 30 | from the base directory of the package.
 31 | 
 32 | 
 33 | Note: The above install script will install the following packages via apt-get:
 34 | build-essential, gcc, g++, cmake, curl, libreadline-dev, git-core, libjpeg-dev,
 35 | libpng-dev, ncurses-dev, imagemagick, unzip, libqt4-dev.
 36 | 
 37 | In addition following Lua components are installed to 'torch' subdirectory: 
 38 | luajit-rocks, cwrap, paths, torch, nn, cutorch, cunn, luafilesystem, penlight, sys, 
 39 | xlua, image, env, qtlua, qttorch, nngraph, lua-gd. 
 40 | 
 41 | Training
 42 | --------
 43 | 
 44 | To run training for a game:
 45 | 
 46 |     ./run_gpu2 <game name>
 47 | 
 48 | Following games are supported:
 49 |  * `Pong2Player` - cooperative game (\rho = -1)
 50 |  * `Pong2Player075` - transition (\rho = -0.75)
 51 |  * `Pong2Player05` - transition (\rho = -0.5)
 52 |  * `Pong2Player025` - transition (\rho = -0.25)
 53 |  * `Pong2Player0` - transition (\rho = 0)
 54 |  * `Pong2Player025p` - transition (\rho = 0.25)
 55 |  * `Pong2Player05p` - transition (\rho = 0.5)
 56 |  * `Pong2Player075p` - transition (\rho = 0.75)
 57 |  * `Pong2PlayerVS` - competitive game (\rho = 1)
 58 | 
 59 | During training the snapshots of networks of both agents are written to `dqn/` folder. These are named `DQN3_0_1_<game name>_FULL_Y_A_<epoch>.t7` and `DQN3_0_1_<game name>_FULL_Y_B_<epoch>.t7`. One epoch is defined as 250,000 steps and they are numbered starting from 0. **NB!** One epoch snapshot takes about 1GB, therefore for 50 epochs reserve 50GB free space.
 60 | 
 61 | Testing
 62 | -------
 63 | 
 64 | To run testing for one episode:
 65 | 
 66 |     ./test_gpu2 <game name> <epoch>
 67 |     
 68 | To run testing with different seeds (by default 10):
 69 | 
 70 |     ./test_gpu2_seeds <game name> <epoch>
 71 | 
 72 | To run testing with different seeds (by default 10), for all epochs (default 49):
 73 | 
 74 |     ./test_gpu2_versions <game name>
 75 |     
 76 | To run all experiments at once:
 77 | 
 78 |     ./test_schemes
 79 |     
 80 | All these scripts write file `dqn/<game name>.csv`, that contains following game statistics:
 81 |  * *Epoch* - epoch number,
 82 |  * *Seed* - seed used for this run,
 83 |  * *WallBounces* - total number of wall-bounces in this run,
 84 |  * *SideBounce* - total number of paddle-bounces in this run,
 85 |  * *Points* - total number of points (lost balls) in this run,
 86 |  * *ServingTime* - total serving time in this run,
 87 |  * *RewardA* - total reward of player A,
 88 |  * *RewardB* - total reward of player B.
 89 | 
 90 | **NB!** All scripts append to this file, so after several runs you might want to delete irrelevant lines.
 91 | 
 92 | Extracting training statistics
 93 | ----------------------------
 94 | 
 95 | To plot training history:
 96 | 
 97 |     ./plot_2results <game name> [<epoch>]
 98 |     
 99 | Following plots are shown for both agents:
100 |  * average reward per game during testing,
101 |  * total count of non-zero rewards during testing,
102 |  * number of games played during testing,
103 |  * average Q-value of validation set.
104 | 
105 | To extract training statistics to file:
106 | 
107 |     ./extract_data <game name> <epoch>
108 | 
109 | This produces files `dqn/<game name>_history_A.csv` and `dqn/<game name>_history_B.csv`. These files contain following columns:
110 |  * *Epoch* - testing phase number, divide by 2 to get true epoch,
111 |  * *Average reward* - average reward per game during testing,
112 |  * *Reward count* - total count of non-zero rewards during testing,
113 |  * *Episode count* - number of games played during testing,
114 |  * *MeanQ* - average W-value of validation set,
115 |  * *TD Error* - temporal difference error,
116 |  * *Seconds* - seconds since start.
117 | 
118 | Plotting game statistics
119 | ------------------------
120 | 
121 | Plotting scripts are in folder `plots`. All `.csv` files from `dqn/` folder should be moved there for plotting. 
122 | 
123 |  * `scatter.py` - plots for figure 7, uses `<game name>.csv` files,
124 |  * `plot.py` - plots for figures 3 and 4, uses `Pong2Player.csv` and `Pong2PlayerVS.csv` files,
125 |  * `plot_history.py` - plots for figure 8, uses `<game name>_history_A.csv` and `<game name>_history_B.csv` files.
126 | 
127 | **NB!** Be sure to clean up `<game name>.csv` files as explained above.
128 | 


--------------------------------------------------------------------------------
/dqn/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | LIMITED LICENSE:
 3 | 
 4 | Copyright (c) 2014 Google Inc.
 5 | Limited License: Under no circumstance is commercial use, reproduction, or
 6 | distribution permitted. Use, reproduction, and distribution are permitted
 7 | solely for academic use in evaluating and reviewing claims made in
 8 | "Human-level control through deep reinforcement learning", Nature 518, 529–533
 9 | (26 February 2015) doi:10.1038/nature14236, provided that the following
10 | conditions are met:
11 | 
12 | * Any reproduction or distribution of source code must retain the above
13 | copyright notice and the full text of this license including the following
14 | disclaimer. 
15 | 
16 | * Any reproduction or distribution in binary form must reproduce the above
17 | copyright notice and the full text of this license including the following
18 | disclaimer  in the documentation and/or other materials provided with the
19 | distribution.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/dqn/Rectifier.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | Copyright (c) 2014 Google Inc.
 3 | 
 4 | See LICENSE file for full terms of limited license.
 5 | ]]
 6 | 
 7 | --[[ Rectified Linear Unit.
 8 | 
 9 | The output is max(0, input).
10 | --]]
11 | 
12 | local Rectifier, parent = torch.class('nn.Rectifier', 'nn.Module')
13 | 
14 | -- This module accepts minibatches
15 | function Rectifier:updateOutput(input)
16 |     return self.output:resizeAs(input):copy(input):abs():add(input):div(2)
17 | end
18 | 
19 | function Rectifier:updateGradInput(input, gradOutput)
20 |     self.gradInput:resizeAs(self.output)
21 |     return self.gradInput:sign(self.output):cmul(gradOutput)
22 | end


--------------------------------------------------------------------------------
/dqn/Scale.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | Copyright (c) 2014 Google Inc.
 3 | 
 4 | See LICENSE file for full terms of limited license.
 5 | ]]
 6 | 
 7 | require "nn"
 8 | require "image"
 9 | 
10 | local scale = torch.class('nn.Scale', 'nn.Module')
11 | 
12 | 
13 | function scale:__init(height, width)
14 |     self.height = height
15 |     self.width = width
16 | end
17 | 
18 | function scale:forward(x)
19 |     local x = x
20 |     if x:dim() > 3 then
21 |         x = x[1]
22 |     end
23 | 
24 |     x = image.rgb2y(x)
25 |     x = image.scale(x, self.width, self.height, 'bilinear')
26 |     return x
27 | end
28 | 
29 | function scale:updateOutput(input)
30 |     return self:forward(input)
31 | end
32 | 
33 | function scale:float()
34 | end
35 | 


--------------------------------------------------------------------------------
/dqn/convnet.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | Copyright (c) 2014 Google Inc.
 3 | 
 4 | See LICENSE file for full terms of limited license.
 5 | ]]
 6 | 
 7 | require "initenv"
 8 | 
 9 | function create_network(args)
10 |     local net = nn.Sequential()
11 |     net:add(nn.Reshape(unpack(args.input_dims)))
12 | 
13 |     --- first convolutional layer
14 |     local convLayer = nn.SpatialConvolution
15 | 
16 |     net:add(convLayer(args.hist_len*args.ncols, args.n_units[1],
17 |                         args.filter_size[1], args.filter_size[1],
18 |                         args.filter_stride[1], args.filter_stride[1],1))
19 |     net:add(args.nl())
20 | 
21 |     -- Add convolutional layers
22 |     for i=1,(#args.n_units-1) do
23 |         -- second convolutional layer
24 |         net:add(convLayer(args.n_units[i], args.n_units[i+1],
25 |                             args.filter_size[i+1], args.filter_size[i+1],
26 |                             args.filter_stride[i+1], args.filter_stride[i+1]))
27 |         net:add(args.nl())
28 |     end
29 | 
30 |     local nel
31 |     if args.gpu >= 0 then
32 |         nel = net:cuda():forward(torch.zeros(1,unpack(args.input_dims))
33 |                 :cuda()):nElement()
34 |     else
35 |         nel = net:forward(torch.zeros(1,unpack(args.input_dims))):nElement()
36 |     end
37 | 
38 |     -- reshape all feature planes into a vector per example
39 |     net:add(nn.Reshape(nel))
40 | 
41 |     -- fully connected layer
42 |     net:add(nn.Linear(nel, args.n_hid[1]))
43 |     net:add(args.nl())
44 |     local last_layer_size = args.n_hid[1]
45 | 
46 |     for i=1,(#args.n_hid-1) do
47 |         -- add Linear layer
48 |         last_layer_size = args.n_hid[i+1]
49 |         net:add(nn.Linear(args.n_hid[i], last_layer_size))
50 |         net:add(args.nl())
51 |     end
52 | 
53 |     -- add the last fully connected layer (to actions)
54 |     net:add(nn.Linear(last_layer_size, args.n_actions))
55 | 
56 |     if args.gpu >=0 then
57 |         net:cuda()
58 |     end
59 |     if args.verbose >= 2 then
60 |         --print(net)
61 |         print('Convolutional layers flattened output size:', nel)
62 |     end
63 |     return net
64 | end
65 | 


--------------------------------------------------------------------------------
/dqn/convnet_atari3.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | Copyright (c) 2014 Google Inc.
 3 | 
 4 | See LICENSE file for full terms of limited license.
 5 | ]]
 6 | 
 7 | require 'convnet'
 8 | 
 9 | return function(args)
10 |     args.n_units        = {32, 64, 64}
11 |     args.filter_size    = {8, 4, 3}
12 |     args.filter_stride  = {4, 2, 1}
13 |     args.n_hid          = {512}
14 |     args.nl             = nn.Rectifier
15 | 
16 |     return create_network(args)
17 | end
18 | 
19 | 


--------------------------------------------------------------------------------
/dqn/extract_data.lua:
--------------------------------------------------------------------------------
 1 | require 'nn'
 2 | require 'initenv'
 3 | require 'cutorch'
 4 | 
 5 | if #arg < 1 then
 6 |   print('Usage: ', arg[0], ' <DQN file>')
 7 |   return
 8 | end
 9 | 
10 | data = torch.load(arg[1])
11 | print("Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds")
12 | for i=1,#data.v_history do
13 |   print(table.concat({i, data.reward_history[i], data.reward_counts[i], 
14 | 	data.episode_counts[i], data.v_history[i], data.td_history[i], data.time_history[i]},','))
15 | end
16 | 
17 | 


--------------------------------------------------------------------------------
/dqn/msleep.c:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * gcc -shared -fPIC -o msleep.so -I../torch/include -L../torch/lib msleep.c
 3 |  * -I and -l may vary on your computer.
 4 |  * Your computer may use something besides -fPIC
 5 | */
 6 | 
 7 | #include <unistd.h>
 8 | #include <lua.h>
 9 | #include <lauxlib.h>
10 | #include <lualib.h>
11 | 
12 | static int msleep_c(lua_State *L){
13 | 	long msecs = lua_tointeger(L, -1);
14 | 	usleep(1000*msecs);
15 | 	return 0;                  /* No items returned */
16 | }
17 | 
18 | /* Can't name this sleep(), it conflicts with sleep() in unistd.h */
19 | static int sleep_c(lua_State *L){
20 | 	long secs = lua_tointeger(L, -1);
21 | 	sleep(secs);
22 | 	return 0;                  /* No items returned */
23 | }
24 | 
25 | /* Register both functions */
26 | int luaopen_msleep(lua_State *L){
27 | 	lua_register( L, "msleep", msleep_c);  
28 | 	lua_register(L, "sleep", sleep_c);
29 | 	return 0;
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/dqn/net_downsample_2x_full_y.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | Copyright (c) 2014 Google Inc.
 3 | 
 4 | See LICENSE file for full terms of limited license.
 5 | ]]
 6 | 
 7 | require "image"
 8 | require "Scale"
 9 | 
10 | local function create_network(args)
11 |     -- Y (luminance)
12 |     return nn.Scale(84, 84, true)
13 | end
14 | 
15 | return create_network
16 | 


--------------------------------------------------------------------------------
/dqn/nnutils.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | Copyright (c) 2014 Google Inc.
 3 | 
 4 | See LICENSE file for full terms of limited license.
 5 | ]]
 6 | 
 7 | require "torch"
 8 | 
 9 | function recursive_map(module, field, func)
10 |     local str = ""
11 |     if module[field] or module.modules then
12 |         str = str .. torch.typename(module) .. ": "
13 |     end
14 |     if module[field] then
15 |         str = str .. func(module[field])
16 |     end
17 |     if module.modules then
18 |         str = str .. "["
19 |         for i, submodule in ipairs(module.modules) do
20 |             local submodule_str = recursive_map(submodule, field, func)
21 |             str = str .. submodule_str
22 |             if i < #module.modules and string.len(submodule_str) > 0 then
23 |                 str = str .. " "
24 |             end
25 |         end
26 |         str = str .. "]"
27 |     end
28 | 
29 |     return str
30 | end
31 | 
32 | function abs_mean(w)
33 |     return torch.mean(torch.abs(w:clone():float()))
34 | end
35 | 
36 | function abs_max(w)
37 |     return torch.abs(w:clone():float()):max()
38 | end
39 | 
40 | -- Build a string of average absolute weight values for the modules in the
41 | -- given network.
42 | function get_weight_norms(module)
43 |     return "\n\nWeight norms:\n" .. recursive_map(module, "weight", abs_mean) ..
44 |             "\nWeight max:\n" .. recursive_map(module, "weight", abs_max)
45 | end
46 | 
47 | -- Build a string of average absolute weight gradient values for the modules
48 | -- in the given network.
49 | function get_grad_norms(module)
50 |     return "Weight grad norms:\n" ..
51 |         recursive_map(module, "gradWeight", abs_mean) ..
52 |         "\nWeight grad max:\n" .. recursive_map(module, "gradWeight", abs_max)
53 | end
54 | 


--------------------------------------------------------------------------------
/dqn/plot_results.lua:
--------------------------------------------------------------------------------
 1 | require 'nn'
 2 | require 'initenv'
 3 | require 'cutorch'
 4 | require 'gnuplot'
 5 | 
 6 | if #arg < 1 then
 7 |   print('Usage: ', arg[0], ' <DQN file>')
 8 |   return
 9 | end
10 | 
11 | if #arg == 2 then agent= ' for agent' .. arg[2]  else agent=' ' end
12 | data = torch.load(arg[1])
13 | 
14 | --gnuplot.raw('set multiplot layout 2, 3')
15 | 
16 | gnuplot.figure()
17 | gnuplot.title('Average reward per game during testing'..agent)
18 | gnuplot.plot(torch.Tensor(data.reward_history))
19 | 
20 | gnuplot.figure()
21 | gnuplot.title('Total count of rewards during testing'..agent)
22 | gnuplot.plot(torch.Tensor(data.reward_counts))
23 | 
24 | gnuplot.figure()
25 | gnuplot.title('Number of games played during testing'..agent)
26 | gnuplot.plot(torch.Tensor(data.episode_counts))
27 | 
28 | gnuplot.figure()
29 | gnuplot.title('Average Q-value of validation set'..agent)
30 | gnuplot.plot(torch.Tensor(data.v_history))
31 | 
32 | --gnuplot.figure()
33 | --gnuplot.title('TD error (old and new Q-value difference) of validation set'..agent)
34 | --gnuplot.plot(torch.Tensor(data.td_history))
35 | 
36 | --gnuplot.figure()
37 | --gnuplot.title('Seconds elapsed after epoch'..agent)
38 | --gnuplot.plot(torch.Tensor(data.time_history))
39 | 
40 | --gnuplot.figure()
41 | --gnuplot.title('Qmax history')
42 | --gnuplot.plot(torch.Tensor(data.qmax_history))
43 | 
44 | 


--------------------------------------------------------------------------------
/dqn/test_2agent.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | Copyright (c) 2014 Google Inc.
  3 | 
  4 | See LICENSE file for full terms of limited license.
  5 | ]]
  6 | 
  7 | gd = require "gd"
  8 | require "math"
  9 | if not dqn then
 10 |     require "initenv"
 11 | end
 12 | 
 13 | local cmd = torch.CmdLine()
 14 | cmd:text()
 15 | cmd:text('TrainAgent in Environment:')
 16 | cmd:text()
 17 | cmd:text('Options:')
 18 | 
 19 | cmd:option('-framework', '', 'name of training framework')
 20 | cmd:option('-env', '', 'name of envirment to use')
 21 | cmd:option('-game_path', '', 'path to environment file (ROM)')
 22 | cmd:option('-env_params', '', 'string of environment parameters')
 23 | cmd:option('-pool_frms', '',
 24 |            'string of frame pooling parameters (e.g.: size=2,type="max")')
 25 | cmd:option('-actrep', 1, 'how many times to repeat action')
 26 | cmd:option('-random_starts', 0, 'play action 0 between 1 and random_starts ' ..
 27 |            'number of times at the start of each training episode')
 28 | 
 29 | cmd:option('-name', '', 'filename used for saving network and training history for agent 1')
 30 | cmd:option('-nameB', '', 'filename used for saving network and training history for agent 2')
 31 | cmd:option('-network', '', 'reload pretrained network for agent 1')
 32 | cmd:option('-networkB', '', 'reload pretrained network for agent 2')
 33 | cmd:option('-agent', '', 'name of agent file to use')
 34 | cmd:option('-agent_params', '', 'string of agent parameters')
 35 | cmd:option('-seed', 3, 'fixed input seed for repeatable experiments')
 36 | 
 37 | cmd:option('-verbose', 2,
 38 |            'the higher the level, the more information is printed to screen')
 39 | cmd:option('-threads', 1, 'number of BLAS threads')
 40 | cmd:option('-gpu', -1, 'gpu flag')
 41 | cmd:option('-gif_file', '', 'GIF path to write session screens')
 42 | cmd:option('-csv_file', '', 'CSV path to write session data')
 43 | cmd:option('-version', '', 'epoch of training')
 44 | cmd:option('-datas_file', '', 'CSV path to write learning evaluation data')
 45 | cmd:text()
 46 | 
 47 | local opt = cmd:parse(arg)
 48 | local clock = os.clock
 49 | --- General setup.
 50 | local game_env, game_actions,game_actionsB, agent,agentB, opt,optB = setup2(opt)
 51 | 
 52 | -- override print to always flush the output
 53 | local old_print = print
 54 | local print = function(...)
 55 |     old_print(...)
 56 |     io.flush()
 57 | end
 58 | 
 59 | local version=opt.version
 60 | -- file names from command line
 61 | local gif_filename = opt.gif_file
 62 | local csv_filename = opt.csv_file
 63 | local datas_filename=opt.datas_file
 64 | print(gif_filename, csv_filename, datas_filename)
 65 | 
 66 | -- start a new game
 67 | local screen, rewardA,rewardB, terminal = game_env:newGame2()
 68 | 
 69 | -- compress screen to JPEG with 100% quality
 70 | local jpg = image.compressJPG(screen:squeeze(), 100)
 71 | -- create gd image from JPEG string
 72 | local im = gd.createFromJpegStr(jpg:storage():string())
 73 | -- convert truecolor to palette
 74 | im:trueColorToPalette(false, 256)
 75 | 
 76 | -- write GIF header, use global palette and infinite looping
 77 | im:gifAnimBegin(gif_filename, true, 0)
 78 | -- write first frame
 79 | im:gifAnimAdd(gif_filename, false, 0, 0, 7, gd.DISPOSAL_NONE)
 80 | 
 81 | -- remember the image and show it first
 82 | local previm = im
 83 | local win = image.display({image=screen})
 84 | 
 85 | -- open CSV file for writing and write header
 86 | local csv_file = assert(io.open(csv_filename, "w"))
 87 | csv_file:write('actionA;ActionB;max_qvalueA;max_qvalueB;rewardA;rewardB;terminal\n')
 88 | local datas_file = assert(io.open(datas_filename, "a+"))
 89 | if opt.seed==1 then datas_file:write('training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB\n') end
 90 | print("Started playing...")
 91 | previousScore=0
 92 | totalSideBounce=0
 93 | previousWallBounce=false
 94 | totalWallBounce=0
 95 | previousSideBounce=0
 96 | servingTime=0
 97 | totalRewardA = 0
 98 | totalRewardB = 0
 99 | -- play one episode (game)
100 | while not terminal do
101 |     -- if action was chosen randomly, Q-value is 0
102 |     agent.bestq = 0
103 |     agentB.bestq = 0
104 |     
105 |     -- choose the best action
106 |     local action_index = agent:perceive(rewardA, screen, terminal, true, 0.01)
107 |     local action_indexB = agentB:perceive(rewardB, screen, terminal, true, 0.01)
108 |     --if agent.bestq == 0 then
109 |     --  print("A random action: " .. action_index)
110 |     --else
111 |     --  print("A agent action: " .. action_index)
112 |     --end
113 |     -- play game in test mode (episodes don't end when losing a life)
114 |     screen, rewardA,rewardB, terminal, sideBouncing,wallBouncing,points,crash,serving = game_env:step2(game_actions[action_index],game_actionsB[action_indexB], false)
115 |     if crash then
116 |        print("CRASHED!!!")
117 |        break
118 |     end
119 |     if rewardA ~= 0 or rewardB ~= 0 then
120 |        print(rewardA, rewardB, points)
121 |     end
122 |     totalRewardA = totalRewardA + rewardA
123 |     totalRewardB = totalRewardB + rewardB
124 |     --gather statisticts for one ball
125 |     -- wallbouncing true when the ball is touching the wall, but we want to count only when it turn true
126 |     if (wallBouncing==true and previousWallBounce==false) then
127 |         totalWallBounce=totalWallBounce+1 
128 |     end
129 |     previousWallBounce=wallBouncing
130 |     
131 |     if (previousSideBounce<sideBouncing) then
132 |         totalSideBounce=totalSideBounce+1
133 |     end
134 |     previousSideBounce=sideBouncing
135 |     if(serving==true) then 
136 |     	servingTime=servingTime+opt.actrep 
137 |     end
138 |    
139 |     
140 | 
141 |     -- display screen
142 |     image.display({image=screen, win=win})
143 | 
144 |     -- create gd image from tensor
145 |     jpg = image.compressJPG(screen:squeeze(), 100)
146 |     im = gd.createFromJpegStr(jpg:storage():string())
147 |     
148 |     -- use palette from previous (first) image
149 |     im:trueColorToPalette(false, 256)
150 |     im:paletteCopy(previm)
151 | 
152 |     -- write new GIF frame, no local palette, starting from left-top, 0.06s delay
153 |     im:gifAnimAdd(gif_filename, false, 0, 0, 6, gd.DISPOSAL_NONE)
154 |     -- remember previous screen for optimal compression
155 |     previm = im
156 | 
157 |     -- write best Q-value for state to CSV file
158 |     csv_file:write(action_index .. ';' ..action_indexB .. ';' .. agent.bestq .. ';' .. agentB.bestq .. ';' .. rewardA .. ';'.. rewardB .. ';' .. tostring(terminal) .. '\n')
159 |     --print(previousScore.." / "..points.." bounce ",totalSideBounce,":"..totalWallBounce)
160 |     
161 | end
162 | print("final "..previousScore.." / "..points.." bounce ",totalSideBounce,":"..totalWallBounce)
163 | datas_file:write(""..version..";"..opt.seed..";"..totalWallBounce..";"..totalSideBounce..";"..points..";"..servingTime..";"..totalRewardA..";"..totalRewardB.."\n")
164 | 
165 | datas_file:close()
166 | 
167 | -- end GIF animation and close CSV file
168 | gd.gifAnimEnd(gif_filename)
169 | csv_file:close()
170 | 
171 | print("Finished playing, close window to exit!")
172 | assert(false)
173 | 
174 | 


--------------------------------------------------------------------------------
/dqn/test_agent.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | Copyright (c) 2014 Google Inc.
  3 | 
  4 | See LICENSE file for full terms of limited license.
  5 | ]]
  6 | 
  7 | gd = require "gd"
  8 | 
  9 | if not dqn then
 10 |     require "initenv"
 11 | end
 12 | 
 13 | local cmd = torch.CmdLine()
 14 | cmd:text()
 15 | cmd:text('Train Agent in Environment:')
 16 | cmd:text()
 17 | cmd:text('Options:')
 18 | 
 19 | cmd:option('-framework', '', 'name of training framework')
 20 | cmd:option('-env', '', 'name of environment to use')
 21 | cmd:option('-game_path', '', 'path to environment file (ROM)')
 22 | cmd:option('-env_params', '', 'string of environment parameters')
 23 | cmd:option('-pool_frms', '',
 24 |            'string of frame pooling parameters (e.g.: size=2,type="max")')
 25 | cmd:option('-actrep', 1, 'how many times to repeat action')
 26 | cmd:option('-random_starts', 0, 'play action 0 between 1 and random_starts ' ..
 27 |            'number of times at the start of each training episode')
 28 | 
 29 | cmd:option('-name', '', 'filename used for saving network and training history')
 30 | cmd:option('-network', '', 'reload pretrained network')
 31 | cmd:option('-agent', '', 'name of agent file to use')
 32 | cmd:option('-agent_params', '', 'string of agent parameters')
 33 | cmd:option('-seed', 1, 'fixed input seed for repeatable experiments')
 34 | 
 35 | cmd:option('-verbose', 2,
 36 |            'the higher the level, the more information is printed to screen')
 37 | cmd:option('-threads', 1, 'number of BLAS threads')
 38 | cmd:option('-gpu', -1, 'gpu flag')
 39 | cmd:option('-gif_file', '', 'GIF path to write session screens')
 40 | cmd:option('-csv_file', '', 'CSV path to write session data')
 41 | 
 42 | cmd:text()
 43 | 
 44 | local opt = cmd:parse(arg)
 45 | 
 46 | --- General setup.
 47 | local game_env, game_actions, agent, opt = setup(opt)
 48 | 
 49 | -- override print to always flush the output
 50 | local old_print = print
 51 | local print = function(...)
 52 |     old_print(...)
 53 |     io.flush()
 54 | end
 55 | 
 56 | -- file names from command line
 57 | local gif_filename = opt.gif_file
 58 | local csv_filename = opt.csv_file
 59 | print(gif_filename, csv_filename)
 60 | 
 61 | -- start a new game
 62 | local screen, reward, terminal = game_env:newGame()
 63 | 
 64 | -- compress screen to JPEG with 100% quality
 65 | local jpg = image.compressJPG(screen:squeeze(), 100)
 66 | -- create gd image from JPEG string
 67 | local im = gd.createFromJpegStr(jpg:storage():string())
 68 | -- convert truecolor to palette
 69 | im:trueColorToPalette(false, 256)
 70 | 
 71 | -- write GIF header, use global palette and infinite looping
 72 | im:gifAnimBegin(gif_filename, true, 0)
 73 | -- write first frame
 74 | im:gifAnimAdd(gif_filename, false, 0, 0, 7, gd.DISPOSAL_NONE)
 75 | 
 76 | -- remember the image and show it first
 77 | local previm = im
 78 | local win = image.display({image=screen})
 79 | 
 80 | -- open CSV file for writing and write header
 81 | local csv_file = assert(io.open(csv_filename, "w"))
 82 | csv_file:write('action;max_qvalue;reward;terminal\n')
 83 | 
 84 | print("Started playing...")
 85 | 
 86 | -- play one episode (game)
 87 | while not terminal do
 88 |     -- if action was chosen randomly, Q-value is 0
 89 |     agent.bestq = 0
 90 |     
 91 |     -- choose the best action
 92 |     local action_index = agent:perceive(reward, screen, terminal, true, 0.01)
 93 | 
 94 |     -- play game in test mode (episodes don't end when losing a life)
 95 |     screen, reward, terminal = game_env:step(game_actions[action_index], false)
 96 | 
 97 |     -- display screen
 98 |     image.display({image=screen, win=win})
 99 | 
100 |     -- create gd image from tensor
101 |     jpg = image.compressJPG(screen:squeeze(), 100)
102 |     im = gd.createFromJpegStr(jpg:storage():string())
103 |     
104 |     -- use palette from previous (first) image
105 |     im:trueColorToPalette(false, 256)
106 |     im:paletteCopy(previm)
107 | 
108 |     -- write new GIF frame, no local palette, starting from left-top, 7ms delay
109 |     im:gifAnimAdd(gif_filename, false, 0, 0, 7, gd.DISPOSAL_NONE)
110 |     -- remember previous screen for optimal compression
111 |     previm = im
112 | 
113 |     -- write best Q-value for state to CSV file
114 |     csv_file:write(action_index .. ';' .. agent.bestq .. ';' .. reward .. ';' .. tostring(terminal) .. '\n')
115 | end
116 | 
117 | -- end GIF animation and close CSV file
118 | gd.gifAnimEnd(gif_filename)
119 | csv_file:close()
120 | 
121 | print("Finished playing, close window to exit!")
122 | 
123 | 


--------------------------------------------------------------------------------
/extract_data:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]; then
 4 |   echo "Please provide the name of the game, e.g.  $0 breakout [version]"; exit 0
 5 | fi
 6 | 
 7 | agenta_file="DQN3_0_1_"$1"_FULL_Y_A"
 8 | agentb_file="DQN3_0_1_"$1"_FULL_Y_B"
 9 | 
10 | if [ "$2" ]; then
11 |   agenta_file=$agenta_file"_"$2
12 |   agentb_file=$agentb_file"_"$2
13 | fi
14 | 
15 | agenta_file=$agenta_file".t7"
16 | agentb_file=$agentb_file".t7"
17 | 
18 | cd dqn
19 | ../torch/bin/luajit extract_data.lua $agenta_file >${1}_history_A.csv
20 | ../torch/bin/luajit extract_data.lua $agentb_file >${1}_history_B.csv
21 | 
22 | 


--------------------------------------------------------------------------------
/install_dependencies.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | ######################################################################
  4 | # Torch install
  5 | ######################################################################
  6 | 
  7 | 
  8 | TOPDIR=$PWD
  9 | 
 10 | # Prefix:
 11 | PREFIX=$PWD/torch
 12 | echo "Installing Torch into: $PREFIX"
 13 | 
 14 | if [[ `uname` != 'Linux' ]]; then
 15 |   echo 'Platform unsupported, only available for Linux'
 16 |   exit
 17 | fi
 18 | if [[ `which apt-get` == '' ]]; then
 19 |     echo 'apt-get not found, platform not supported'
 20 |     exit
 21 | fi
 22 | 
 23 | # Install dependencies for Torch:
 24 | sudo apt-get update
 25 | sudo apt-get install -qqy build-essential
 26 | sudo apt-get install -qqy gcc g++
 27 | sudo apt-get install -qqy cmake
 28 | sudo apt-get install -qqy curl
 29 | sudo apt-get install -qqy libreadline-dev
 30 | sudo apt-get install -qqy git-core
 31 | sudo apt-get install -qqy libjpeg-dev
 32 | sudo apt-get install -qqy libpng-dev
 33 | sudo apt-get install -qqy ncurses-dev
 34 | sudo apt-get install -qqy imagemagick
 35 | sudo apt-get install -qqy unzip
 36 | sudo apt-get install -qqy libqt4-dev
 37 | sudo apt-get update
 38 | 
 39 | 
 40 | echo "==> Torch7's dependencies have been installed"
 41 | 
 42 | 
 43 | 
 44 | 
 45 | 
 46 | # Build and install Torch7
 47 | mkdir -p $PREFIX/src
 48 | cd $PREFIX/src
 49 | rm -rf luajit-rocks
 50 | git clone https://github.com/torch/luajit-rocks.git
 51 | cd luajit-rocks
 52 | mkdir -p build
 53 | cd build
 54 | git checkout master; git pull
 55 | rm -f CMakeCache.txt
 56 | cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_BUILD_TYPE=Release
 57 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi
 58 | make
 59 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi
 60 | make install
 61 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi
 62 | 
 63 | 
 64 | path_to_nvcc=$(which nvcc)
 65 | if [ -x "$path_to_nvcc" ]
 66 | then
 67 |     cutorch=ok
 68 |     cunn=ok
 69 | fi
 70 | 
 71 | # Install base packages:
 72 | $PREFIX/bin/luarocks install cwrap
 73 | $PREFIX/bin/luarocks install paths
 74 | $PREFIX/bin/luarocks install torch
 75 | $PREFIX/bin/luarocks install nn
 76 | 
 77 | [ -n "$cutorch" ] && \
 78 | ($PREFIX/bin/luarocks install cutorch)
 79 | [ -n "$cunn" ] && \
 80 | ($PREFIX/bin/luarocks install cunn)
 81 | 
 82 | $PREFIX/bin/luarocks install luafilesystem
 83 | $PREFIX/bin/luarocks install penlight
 84 | $PREFIX/bin/luarocks install sys
 85 | $PREFIX/bin/luarocks install xlua
 86 | $PREFIX/bin/luarocks install image
 87 | $PREFIX/bin/luarocks install env
 88 | $PREFIX/bin/luarocks install qtlua
 89 | $PREFIX/bin/luarocks install qttorch
 90 | $PREFIX/bin/luarocks install luagd
 91 | 
 92 | echo ""
 93 | echo "=> Torch7 has been installed successfully"
 94 | echo ""
 95 | 
 96 | 
 97 | echo "Installing nngraph ... "
 98 | $PREFIX/bin/luarocks install nngraph
 99 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi
100 | echo "nngraph installation completed"
101 | 
102 | echo "Installing Xitari ... "
103 | cd $PREFIX/src
104 | rm -rf xitari
105 | git clone https://github.com/NeuroCSUT/Xitari2Player.git xitari
106 | cd xitari
107 | $PREFIX/bin/luarocks make
108 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi
109 | echo "Xitari installation completed"
110 | 
111 | echo "Installing Alewrap ... "
112 | cd $PREFIX/src
113 | rm -rf alewrap
114 | git clone https://github.com/NeuroCSUT/Alewrap2Player.git alewrap
115 | cd alewrap
116 | $PREFIX/bin/luarocks make
117 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi
118 | echo "Alewrap installation completed"
119 | 
120 | echo "Installing Lua-GD ... "
121 | cd $PREFIX/src
122 | rm -rf lua-gd
123 | git clone https://github.com/ittner/lua-gd.git
124 | cd lua-gd
125 | sed -i 's/LUABIN=lua5.1/LUABIN=..\/..\/bin\/luajit/' Makefile
126 | sed -i 's/`pkg-config \$(LUAPKG) --cflags`/-I..\/..\/include/' Makefile
127 | $PREFIX/bin/luarocks make
128 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi
129 | echo "Lua-GD installation completed"
130 | 
131 | echo
132 | echo "You can run experiments by executing: "
133 | echo
134 | echo "   ./run_cpu game_name"
135 | echo
136 | echo "            or   "
137 | echo
138 | echo "   ./run_gpu game_name"
139 | echo
140 | echo "For this you need to provide the rom files of the respective games (game_name.bin) in the roms/ directory"
141 | echo
142 | 
143 | 


--------------------------------------------------------------------------------
/plot_2results:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" -o -z "$2" ]
 4 |   then echo "Please provide the name of the game, e.g.  $0 breakout <version>"; exit 0
 5 | fi
 6 | 
 7 | agent_type="DQN3_0_1"
 8 | agent_name=$agent_type"_"$1"_FULL_Y_A_"$2
 9 | agent_nameB=$agent_type"_"$1"_FULL_Y_B_"$2
10 | network_file="$agent_name.t7"
11 | network_fileB="$agent_nameB.t7"
12 | 
13 | cd dqn
14 | ../torch/bin/luajit plot_results.lua $network_file '1'
15 | ../torch/bin/luajit plot_results.lua $network_fileB '2'
16 | 


--------------------------------------------------------------------------------
/plot_results:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  $0 breakout [version]"; exit 0
 5 | fi
 6 | 
 7 | agent_type="DQN3_0_1"
 8 | 
 9 | agent_name=$agent_type"_"$1"_FULL_Y"
10 | 
11 | if [ "$2" ]
12 |   then agent_name=$agent_name"_"$2
13 | fi
14 | 
15 | cd dqn
16 | ../torch/bin/luajit plot_results.lua $network_file
17 | 
18 | 


--------------------------------------------------------------------------------
/plots/Pong2Player0.csv:
--------------------------------------------------------------------------------
 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB
 2 | 49;1;113;144;31;40596;-10;-21
 3 | 49;2;83;110;26;54180;-16;-10
 4 | 49;3;123;150;33;50904;-13;-20
 5 | 49;4;87;118;28;53392;-14;-14
 6 | 49;5;86;119;32;41276;-11;-21
 7 | 49;6;68;95;22;55760;-11;-11
 8 | 49;7;107;131;30;39684;-9;-21
 9 | 49;8;103;138;31;41120;-10;-21
10 | 49;9;119;134;30;50664;-9;-21
11 | 49;10;106;132;32;41612;-11;-21
12 | 


--------------------------------------------------------------------------------
/plots/Pong2Player025.csv:
--------------------------------------------------------------------------------
 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB
 2 | 49;1;100;178;35;38308;-24,5;-19,25
 3 | 49;2;84;126;28;52024;-21,25;-13,75
 4 | 49;3;83;151;30;50204;-21;-16,5
 5 | 49;4;87;153;27;51320;-18,75;-15
 6 | 49;5;117;169;32;47916;-23,75;-16,25
 7 | 49;6;63;103;21;54852;-15,75;-10,5
 8 | 49;7;76;150;28;50944;-17,5;-17,5
 9 | 49;8;88;158;29;49432;-18,5;-17,75
10 | 49;9;81;133;23;52328;-17,75;-11
11 | 49;10;101;161;32;49024;-21,5;-18,5
12 | 


--------------------------------------------------------------------------------
/plots/Pong2Player025_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_average_reward.png


--------------------------------------------------------------------------------
/plots/Pong2Player025_episode_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_episode_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player025_history_A.csv:
--------------------------------------------------------------------------------
 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
 2 | 1,-16.175,568,20,-0.039221713870764,0.029145060969517,0
 3 | 2,-5.25,1808,86,-0.08608803910017,0.024464469596744,2145.1178991795
 4 | 3,-7.89,595,25,-0.16516388082504,0.036043447315693,5047.8407013416
 5 | 4,-17.1375,563,20,-0.21209482467175,0.024275738954544,7975.1648442745
 6 | 5,-13.725,559,20,-0.25724805343151,0.023471528768539,10931.119163036
 7 | 6,-18.09375,882,32,-0.28720419490337,0.039458050966263,13900.719771862
 8 | 7,-5.25,1807,86,-0.29875303316116,0.035284322023392,16899.678385973
 9 | 8,-5.9309210526316,1643,76,-0.30827509558201,0.027181980490685,19909.909010172
10 | 9,-17.652173913043,554,23,-0.34414884364605,0.023285815060139,22939.618853092
11 | 10,-24.25,493,14,-0.45101404255629,0.026651228666306,25973.788203239
12 | 11,-17.8,566,20,-0.52390958654881,0.029277494549751,29014.436220407
13 | 12,-18.814814814815,729,27,-0.54987085461617,0.039004881381989,32054.2854774
14 | 13,-22.7625,569,20,-0.54759183239937,0.037574471831322,35099.126308441
15 | 14,-21.826086956522,573,23,-0.5414869966507,0.031009385406971,38136.861510515
16 | 15,-23.363636363636,687,22,-0.53737711763382,0.029484157204628,41182.874017715
17 | 16,-21.93,647,25,-0.55663867473602,0.028866673767567,44221.490203857
18 | 17,-20.539473684211,672,19,-0.55051146292686,0.029257072925568,47265.278861761
19 | 18,-21.64,597,25,-0.5600018157959,0.028754609167576,50302.363017797
20 | 19,-22.836538461538,755,26,-0.528419293046,0.032014802247286,53345.915400743
21 | 20,-19.066666666667,500,15,-0.55494042682648,0.039873561859131,56382.528666735
22 | 21,-23.796875,554,16,-0.60025869548321,0.029821652859449,59421.726590633
23 | 22,-21.967391304348,581,23,-0.60584501338005,0.032700836449862,62458.533436537
24 | 23,-22.736111111111,504,18,-0.61955435323715,0.033058366805315,65503.937287807
25 | 24,-24.214285714286,528,14,-0.56297829425335,0.035921178489923,68542.928196669
26 | 25,-21.807692307692,463,13,-0.56452477037907,0.03643785020709,71586.370691776
27 | 26,-17.3,492,15,-0.51090985023975,0.035625722199678,74622.337754726
28 | 27,-20.346153846154,444,13,-0.43836925184727,0.03375756187737,77666.558803558
29 | 28,-22.55,399,10,-0.43404640996456,0.031462731868029,80709.149359703
30 | 29,-23.2,396,10,-0.41233416175842,0.035647051796317,83754.154893875
31 | 30,-23.431818181818,415,11,-0.38006203866005,0.032858549267054,86794.799460888
32 | 31,-23.125,403,10,-0.35445768916607,0.031531541958451,89839.270530939
33 | 32,-23,391,10,-0.30434643936157,0.034153116598725,92875.915857077
34 | 33,-22.75,396,10,-0.28986469483376,0.035212990507483,95919.116505146
35 | 34,-24.575,382,10,-0.26612943577766,0.032227400070056,98955.592324972
36 | 35,-23.25,392,13,-0.26001595187187,0.033032483366318,102001.58251119
37 | 36,-24.8,382,10,-0.27397701132298,0.041171750120819,105041.78489709
38 | 37,-24.229166666667,409,12,-0.25113397979736,0.033362254701555,108088.16415405
39 | 38,-22.725,389,10,-0.26228211903572,0.032622256435454,111126.96911716
40 | 39,-23.840909090909,367,11,-0.23275652563572,0.032861182622612,114174.376688
41 | 40,-24.472222222222,367,9,-0.23278466582298,0.028766998458654,117212.66692805
42 | 41,-24.333333333333,353,9,-0.23536169910431,0.031785745821893,120277.53845716
43 | 42,-22.75,365,9,-0.23552758347988,0.029502598330379,123339.66571617
44 | 43,-24.25,364,10,-0.26656704473495,0.032324158191681,126401.14022613
45 | 44,-23.575,352,10,-0.23555584084988,0.028791207253933,129460.99070621
46 | 45,-24.3,354,10,-0.21958861577511,0.032881412982941,132517.0496223
47 | 46,-23.875,352,10,-0.21235245406628,0.032139036647975,135574.30851746
48 | 47,-23.5,370,10,-0.19236077666283,0.029558058961295,138640.52359223
49 | 48,-24.5,333,9,-0.20597528707981,0.029206026017666,141701.27262425
50 | 49,-24.6,354,10,-0.2066180062294,0.032127544768155,144765.86236525
51 | 50,-24.472222222222,329,9,-0.20180555462837,0.030132573431358,147825.13783622
52 | 51,-24.916666666667,368,9,-0.17858661842346,0.031521749011008,150881.76423216
53 | 52,-22.416666666667,358,9,-0.18114964962006,0.028377236567438,153922.41813803
54 | 53,-23.5,344,10,-0.18467670333385,0.028995035981759,156972.89826202
55 | 54,-23.277777777778,344,9,-0.1676502161026,0.029776763111586,160014.81617689
56 | 55,-24.083333333333,341,9,-0.1743116055727,0.027832619239576,163061.01360178
57 | 56,-22.388888888889,333,9,-0.16487859320641,0.027677313060616,166101.79607463
58 | 57,-22.96875,331,8,-0.1488944196701,0.027799856112804,169148.1457777
59 | 58,-24.166666666667,343,9,-0.13687021172047,0.027575396563159,172210.88137054
60 | 59,-23.111111111111,346,9,-0.15149549734592,0.02638567374018,175278.51135159
61 | 60,-23.4375,334,8,-0.14469087707996,0.028253893780871,178342.54405665
62 | 61,-23.75,329,9,-0.15923279857635,0.028194830140797,181408.60506177
63 | 62,-22.8125,330,8,-0.13924461603165,0.02555070837657,184469.28358388
64 | 63,-25.194444444444,342,9,-0.10861786818504,0.026180025865324,187535.17608976
65 | 64,-24.5,326,8,-0.092891018033028,0.030780095353723,190594.06337309
66 | 65,-24.333333333333,338,9,-0.086945027470589,0.03013668980822,193661.88482189
67 | 66,-23.625,322,8,-0.10515263080597,0.029240502325818,196724.74574399
68 | 67,-23.888888888889,327,9,-0.097472587585449,0.026558110132813,199790.20792198
69 | 68,-24.166666666667,326,9,-0.08769202375412,0.030075371234678,202857.40764117
70 | 69,-23.1875,325,8,-0.089009696483612,0.030123330419883,205922.43129015
71 | 70,-24.375,349,10,-0.076255015015602,0.031035634227097,208987.50090623
72 | 71,-23.825,367,10,-0.082351405143738,0.026536399340257,212053.49057627
73 | 72,-22.222222222222,328,9,-0.10591663193703,0.027814963047858,215114.73465633
74 | 73,-24.5,326,8,-0.10250155234337,0.027341711520217,218183.40867519
75 | 74,-22.59375,303,8,-0.10880286765099,0.026504224833101,221227.51655221
76 | 75,-23.90625,324,8,-0.10862943220139,0.027639281807467,224292.29700541
77 | 76,-23.28125,304,8,-0.12024115896225,0.026453432897106,227351.26816535
78 | 77,-23.65625,293,8,-0.12196779179573,0.029779008532409,230415.81299853
79 | 78,-22.03125,313,8,-0.11778739881516,0.027597792163957,233474.80665731
80 | 79,-21.777777777778,318,9,-0.1045371119976,0.029160832708701,236540.2464354
81 | 80,-24.65625,311,8,-0.099472584724426,0.029115197545849,239600.92933631
82 | 81,-21.90625,312,8,-0.087682006239891,0.02894111315161,242665.77772045
83 | 82,-21.96875,311,8,-0.082646775841713,0.024405353136361,245729.7993753
84 | 83,-22.78125,309,8,-0.075779815554619,0.027971854389645,248799.43783331
85 | 84,-23.722222222222,316,9,-0.066599355697632,0.025790769956075,251859.77453947
86 | 85,-24.28125,311,8,-0.039750346183777,0.026809580738656,254927.8234973
87 | 86,-22.09375,302,8,-0.052736337065697,0.025766940789297,257985.59742713
88 | 87,-17.472222222222,327,9,-0.041633208632469,0.024616640799097,261037.21990418
89 | 88,-22.71875,307,8,-0.033747999787331,0.025986618170049,264102.1203053
90 | 89,-19.96875,307,8,-0.018715925335884,0.025599762066267,267167.29295135
91 | 90,-24.25,308,8,-0.016242628574371,0.029913896901067,270228.33542633
92 | 91,-24.025,348,10,-0.014869958758354,0.027084466501838,273296.45201826
93 | 92,-24.5,309,8,-0.018914625287056,0.02361085549579,276342.35304213
94 | 93,-21.75,304,8,-0.0033151375055313,0.024955295423511,279391.21635818
95 | 94,-23.90625,304,8,-0.0086528804302216,0.026489283834584,282433.16600633
96 | 


--------------------------------------------------------------------------------
/plots/Pong2Player025_history_B.csv:
--------------------------------------------------------------------------------
 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
 2 | 1,-18.3875,568,20,-0.047463739871979,0.019119021207094,0
 3 | 2,-21,1808,86,-0.11923271918297,0.017456383809447,2145.1178991795
 4 | 3,-21.36,595,25,-0.18175252383947,0.017800595074892,5047.8407013416
 5 | 4,-17.3625,563,20,-0.24600265741348,0.017269527733326,7975.1648442745
 6 | 5,-20.2125,559,20,-0.28691787457466,0.034651648104191,10931.119163036
 7 | 6,-15.7734375,882,32,-0.30191461646557,0.034897827267647,13900.719771862
 8 | 7,-21,1807,86,-0.31138520777225,0.037016086220741,16899.678385973
 9 | 8,-21.059210526316,1643,76,-0.27793198502064,0.024718726277351,19909.909010172
10 | 9,-12.402173913043,554,23,-0.32790268707275,0.029424214139581,22939.618853092
11 | 10,-19.589285714286,493,14,-0.43140547662973,0.040308959618211,25973.788203239
12 | 11,-17.3875,566,20,-0.42355911284685,0.03875673687458,29014.436220407
13 | 12,-14.425925925926,729,27,-0.37147549843788,0.036230552509427,32054.2854774
14 | 13,-12.3,569,20,-0.40821820640564,0.03606599791348,35099.126308441
15 | 14,-8.554347826087,573,23,-0.45022543692589,0.047203338369727,38136.861510515
16 | 15,-14.875,687,22,-0.42852363598347,0.040189081296325,41182.874017715
17 | 16,-8.97,647,25,-0.48244062685966,0.036945415109396,44221.490203857
18 | 17,-21.368421052632,672,19,-0.48765191411972,0.035779230415821,47265.278861761
19 | 18,-7.81,597,25,-0.50453666257858,0.038317348569632,50302.363017797
20 | 19,-12.596153846154,755,26,-0.54132361984253,0.034977702558041,53345.915400743
21 | 20,-20.516666666667,500,15,-0.54874300312996,0.034060227304697,56382.528666735
22 | 21,-17.140625,554,16,-0.58508399951458,0.037577494502068,59421.726590633
23 | 22,-9.1195652173913,581,23,-0.51264390897751,0.037798419475555,62458.533436537
24 | 23,-12.194444444444,504,18,-0.50770425796509,0.037071759358048,65503.937287807
25 | 24,-20.517857142857,528,14,-0.44087818443775,0.0358794843629,68542.928196669
26 | 25,-22.038461538462,463,13,-0.41854939472675,0.035565362289548,71586.370691776
27 | 26,-21.7,492,15,-0.37197616314888,0.033765547022223,74622.337754726
28 | 27,-21.961538461538,444,13,-0.30863987386227,0.037052399717271,77666.558803558
29 | 28,-22.7,399,10,-0.30608900702,0.035842268519104,80709.149359703
30 | 29,-24.925,396,10,-0.31908070909977,0.037388368692249,83754.154893875
31 | 30,-21.795454545455,415,11,-0.33511826062202,0.03828813894093,86794.799460888
32 | 31,-23.875,403,10,-0.31773330914974,0.043416036142036,89839.270530939
33 | 32,-22.625,391,10,-0.29725316429138,0.037307736651972,92875.915857077
34 | 33,-23.875,396,10,-0.27369178593159,0.037329317059368,95919.116505146
35 | 34,-20.675,382,10,-0.23598987996578,0.039341125553474,98955.592324972
36 | 35,-14.25,392,13,-0.23570065188408,0.0370699272817,102001.58251119
37 | 36,-22.7,382,10,-0.22717701399326,0.0369602432549,105041.78489709
38 | 37,-18.166666666667,409,12,-0.20643604362011,0.033634546946734,108088.16415405
39 | 38,-21.9,389,10,-0.19385940217972,0.040584698152728,111126.96911716
40 | 39,-16.613636363636,367,11,-0.17805164909363,0.036598243892659,114174.376688
41 | 40,-21.222222222222,367,9,-0.16340972304344,0.0363655543942,117212.66692805
42 | 41,-21.916666666667,353,9,-0.13660684776306,0.03570253569819,120277.53845716
43 | 42,-22.666666666667,365,9,-0.12664770591259,0.030618733669631,123339.66571617
44 | 43,-18.625,364,10,-0.11581337034702,0.035453151194379,126401.14022613
45 | 44,-16.675,352,10,-0.13645259201527,0.032029403009452,129460.99070621
46 | 45,-19.575,354,10,-0.11208348071575,0.039308571979403,132517.0496223
47 | 46,-18.25,352,10,-0.077307417631149,0.034906720773317,135574.30851746
48 | 47,-20.5,370,10,-0.067142282962799,0.032025041517802,138640.52359223
49 | 48,-20.083333333333,333,9,-0.050315158486366,0.032454478519969,141701.27262425
50 | 49,-19.65,354,10,-0.029398714721203,0.032754215477034,144765.86236525
51 | 50,-19.555555555556,329,9,-0.061822235167027,0.037998179838527,147825.13783622
52 | 51,-21.333333333333,368,9,-0.081514114260674,0.031815447958186,150881.76423216
53 | 52,-23,358,9,-0.058856061398983,0.034865049999207,153922.41813803
54 | 53,-18.625,344,10,-0.065357940196991,0.035583984130528,156972.89826202
55 | 54,-23.111111111111,344,9,-0.081974079191685,0.033413144109771,160014.81617689
56 | 55,-19.25,341,9,-0.097408960103989,0.032794755496085,163061.01360178
57 | 56,-23.305555555556,333,9,-0.088108561575413,0.03458730529435,166101.79607463
58 | 57,-24.375,331,8,-0.099771278560162,0.035198075203225,169148.1457777
59 | 58,-22.5,343,9,-0.11109308534861,0.032190475354437,172210.88137054
60 | 59,-22.027777777778,346,9,-0.080554970800877,0.032436789471656,175278.51135159
61 | 60,-23.4375,334,8,-0.079262801647186,0.029142876271158,178342.54405665
62 | 61,-21.25,329,9,-0.082398749470711,0.034636469038203,181408.60506177
63 | 62,-23.28125,330,8,-0.063544670343399,0.03176398887625,184469.28358388
64 | 63,-22.027777777778,342,9,-0.066933371722698,0.031416806509718,187535.17608976
65 | 64,-21.125,326,8,-0.054739053189754,0.030579170981422,190594.06337309
66 | 65,-19.833333333333,338,9,-0.05774846214056,0.030011123730801,193661.88482189
67 | 66,-22.78125,322,8,-0.081116970300674,0.02639879387524,196724.74574399
68 | 67,-19.722222222222,327,9,-0.068454475164413,0.030450439533219,199790.20792198
69 | 68,-19.583333333333,326,9,-0.045518710792065,0.029313153597061,202857.40764117
70 | 69,-22.4375,325,8,-0.064951929867268,0.028022845759057,205922.43129015
71 | 70,-18.75,349,10,-0.043183997392654,0.028989451053552,208987.50090623
72 | 71,-20.3,367,10,-0.025602949619293,0.029179670328042,212053.49057627
73 | 72,-23.055555555556,328,9,-0.041780498087406,0.030262483103201,215114.73465633
74 | 73,-21.59375,326,8,-0.065926621258259,0.026754813395441,218183.40867519
75 | 74,-22.875,303,8,-0.062578474581242,0.026043576397933,221227.51655221
76 | 75,-22.03125,324,8,-0.073804795444012,0.029437688516453,224292.29700541
77 | 76,-22.34375,304,8,-0.067518562853336,0.027564628321677,227351.26816535
78 | 77,-21.5,293,8,-0.068931222259998,0.029230595620349,230415.81299853
79 | 78,-23.4375,313,8,-0.078792982280254,0.028379039938562,233474.80665731
80 | 79,-22.111111111111,318,9,-0.055398044526577,0.029704592891037,236540.2464354
81 | 80,-20.34375,311,8,-0.058394014656544,0.026572366565699,239600.92933631
82 | 81,-24.34375,312,8,-0.042984465479851,0.029344257420395,242665.77772045
83 | 82,-23.65625,311,8,-0.032085248291492,0.026987537696492,245729.7993753
84 | 83,-24.09375,309,8,-0.026669816911221,0.028634731254075,248799.43783331
85 | 84,-19.055555555556,316,9,-0.023739550888538,0.027961498648685,251859.77453947
86 | 85,-20.25,311,8,-0.02610385787487,0.028155951410998,254927.8234973
87 | 86,-24.625,302,8,-0.036636304974556,0.029357400156558,257985.59742713
88 | 87,-24.055555555556,327,9,-0.041067724645138,0.029178894826677,261037.21990418
89 | 88,-22.90625,307,8,-0.023426564395428,0.028285281694261,264102.1203053
90 | 89,-24.09375,307,8,-0.02995936101675,0.028055407757871,267167.29295135
91 | 90,-19.1875,308,8,-0.037269575536251,0.027009723492898,270228.33542633
92 | 91,-17.35,348,10,-0.051642109036446,0.028042263409123,273296.45201826
93 | 92,-22.0625,309,8,-0.035263738811016,0.027735443314305,276342.35304213
94 | 93,-21.84375,304,8,-0.015153986394405,0.028466824442614,279391.21635818
95 | 94,-19.6875,304,8,-0.015177275419235,0.026481435511378,282433.16600633
96 | 


--------------------------------------------------------------------------------
/plots/Pong2Player025_meanq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_meanq.png


--------------------------------------------------------------------------------
/plots/Pong2Player025_reward_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_reward_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player025_tderror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_tderror.png


--------------------------------------------------------------------------------
/plots/Pong2Player025_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_time.png


--------------------------------------------------------------------------------
/plots/Pong2Player025p.csv:
--------------------------------------------------------------------------------
 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB
 2 | 49;1;177;156;32;28400;-5,75;-18,25
 3 | 49;2;226;191;38;45716;-15,5;-13
 4 | 49;3;212;181;39;45024;-16,5;-12,75
 5 | 49;4;159;155;32;40044;-5,75;-18,25
 6 | 49;5;165;138;38;44924;-11,75;-16,75
 7 | 49;6;136;119;29;28016;-2,75;-19
 8 | 49;7;150;129;33;36964;-6,75;-18
 9 | 49;8;188;180;39;32700;-12,75;-16,5
10 | 49;9;160;160;34;46008;-7,75;-17,75
11 | 49;10;170;162;36;38864;-9,75;-17,25
12 | 


--------------------------------------------------------------------------------
/plots/Pong2Player025p_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_average_reward.png


--------------------------------------------------------------------------------
/plots/Pong2Player025p_episode_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_episode_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player025p_history_A.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-12.352941176471,547,17,0.00082937029004097,0.027333959957585,0
  3 | 2,3.28,568,25,-0.015955855160952,0.024399156646803,1853.2487101555
  4 | 3,-10.745,1587,50,-0.04002983725071,0.023911268580705,4738.9991998672
  5 | 4,-13.666666666667,576,21,-0.021115227997303,0.027273571142927,7648.6803650856
  6 | 5,4.9786585365854,1740,82,-0.052541925430298,0.026042224757373,10568.621224165
  7 | 6,-2.2045454545455,567,22,-0.047908736944199,0.028969800889492,13514.659698248
  8 | 7,-14.1484375,923,32,-0.050299361824989,0.028472188197076,16481.173532248
  9 | 8,1,582,24,-0.022397070169449,0.02070509136375,19472.213679314
 10 | 9,-19.891304347826,586,23,-0.041784821867943,0.024664424210321,22485.965064049
 11 | 10,-14.4375,567,20,-0.063855166316032,0.030764868427068,25520.983683109
 12 | 11,-19.011363636364,553,22,-0.09504049217701,0.03342449611146,28549.404037952
 13 | 12,-16.329545454545,726,22,-0.12252291846275,0.03835753168771,31593.315760136
 14 | 13,-9.8425925925926,850,27,-0.12895621919632,0.034671363378526,34623.699748039
 15 | 14,-6.1666666666667,517,15,-0.14886118376255,0.032452720825095,37662.579718828
 16 | 15,-17.038461538462,728,26,-0.13114638340473,0.033712715710048,40696.674022913
 17 | 16,-19,566,19,-0.19647386515141,0.03248389304895,43748.12767005
 18 | 17,-19.913265306122,1244,49,-0.24450411760807,0.035744567394257,46811.069835186
 19 | 18,-17.763157894737,1005,38,-0.29554573619366,0.03675818355009,49869.176441431
 20 | 19,-19.454545454545,598,22,-0.27713277065754,0.056914875906426,52920.243164301
 21 | 20,-14.986111111111,714,18,-0.28785525941849,0.061606936244294,55978.05390048
 22 | 21,-12.513157894737,704,19,-0.31740967047215,0.040825152235106,59030.078924179
 23 | 22,-19.875,1139,44,-0.31587430310249,0.041830532593653,62078.087513208
 24 | 23,-11.973684210526,709,19,-0.30709554183483,0.036709113527089,65080.992939234
 25 | 24,-10.342105263158,598,19,-0.33734118914604,0.040537785271183,68080.805202246
 26 | 25,-6.3214285714286,929,28,-0.27032305586338,0.037287141215988,71091.365906477
 27 | 26,-16.111111111111,696,18,-0.21350250959396,0.036020501473453,74456.818608284
 28 | 27,-16.039473684211,691,19,-0.20027073895931,0.037381986066233,77459.906303406
 29 | 28,-18.794642857143,847,28,-0.16577923822403,0.036279377653729,80472.901323557
 30 | 29,-16.321428571429,519,14,-0.16614373016357,0.036354737143032,83495.177422523
 31 | 30,-17.575,686,20,-0.1629597299099,0.04528989072144,86509.107819557
 32 | 31,-11.5,427,11,-0.13156876277924,0.037213211272843,89516.892535448
 33 | 32,-11.416666666667,439,12,-0.12611813902855,0.035730781964026,92509.944949389
 34 | 33,-15.692307692308,493,13,-0.11401822388172,0.034237996558892,95515.364245415
 35 | 34,-16.692307692308,500,13,-0.050775911331177,0.037290098330937,98538.464844465
 36 | 35,-10.134615384615,490,13,0.011082951903343,0.035221740954556,101545.67380142
 37 | 36,-13.3125,489,12,0.05101346039772,0.033417437257245,104565.05976629
 38 | 37,-15.522727272727,415,11,0.05477943277359,0.035748205714393,107557.31085825
 39 | 38,-11.769230769231,449,13,0.03531555891037,0.030885013681836,110563.57991529
 40 | 39,-12.308823529412,643,17,0.0051241598129272,0.030626845588908,113553.20760727
 41 | 40,-15.340909090909,439,11,0.024405554294586,0.034930394763709,116573.10384536
 42 | 41,-9.55,539,15,0.015237793087959,0.031570149709005,119602.37818432
 43 | 42,-13.388888888889,360,9,0.020310340762138,0.033570738155628,122643.66008639
 44 | 43,-11.791666666667,481,12,0.044084054112434,0.03250115283113,125665.90826011
 45 | 44,-12,420,11,0.064705698490143,0.033147096188273,128680.62530828
 46 | 45,-14.068181818182,407,11,0.068985550761223,0.033349006434903,131705.14903736
 47 | 46,-14.525,372,10,0.051267961621284,0.032088694971055,134712.71475434
 48 | 47,-18.1875,396,12,0.050818180918694,0.050975416313857,137733.19209051
 49 | 48,-13.444444444444,374,9,0.061889111042023,0.034130853615701,140749.8140893
 50 | 49,-14.925,380,10,0.042458220124245,0.034601580746938,143770.8537302
 51 | 50,-13.527777777778,346,9,0.024435671687126,0.028946696706116,146801.87147307
 52 | 51,-15.25,478,12,0.080733409762383,0.030970462821424,149838.03255987
 53 | 52,-17,374,10,0.10193769836426,0.031945135511458,152843.86857486
 54 | 53,-16.625,361,10,0.093968224525452,0.031251499648206,155836.19525099
 55 | 54,-15.75,374,10,0.097057873010635,0.034328001841903,158878.77326703
 56 | 55,-15.636363636364,413,11,0.11512637388706,0.030982962438837,161907.93762589
 57 | 56,-15.95,359,10,0.13147314918041,0.029474855260924,164959.00357485
 58 | 57,-13.65,376,10,0.11861356890202,0.031835000259802,167964.01353598
 59 | 58,-14.916666666667,360,9,0.095459250211716,0.032202778627165,170963.46770382
 60 | 59,-14.3,374,10,0.073190112948418,0.033053754236549,173959.79004407
 61 | 60,-17.175,359,10,0.071609259605408,0.035708082264289,176995.46294403
 62 | 61,-19.216666666667,429,15,0.033061665534973,0.036125382841565,180024.85727596
 63 | 62,-13.181818181818,419,11,0.071844058156013,0.032730900298804,183063.08219004
 64 | 63,-14.3,364,10,0.075431159377098,0.035262108405586,186085.26321888
 65 | 64,-12.8,364,10,0.12851213681698,0.030993033098057,189114.62298989
 66 | 65,-11.625,365,10,0.12246531331539,0.026319333478808,192137.97750092
 67 | 66,-12.694444444444,361,9,0.10268555891514,0.033918607313186,195184.31896782
 68 | 67,-13.138888888889,330,9,0.11241964411736,0.031062320400029,198208.45438385
 69 | 68,-10.027777777778,314,9,0.11970028889179,0.030205767504871,201264.16876793
 70 | 69,-17.15,361,10,0.10617531645298,0.030443864509463,204330.46117401
 71 | 70,-13.611111111111,343,9,0.13672581923008,0.030963398275897,207367.36512089
 72 | 71,-10.916666666667,340,9,0.13866924703121,0.02808295147866,210394.67483687
 73 | 72,-13.111111111111,335,9,0.15276759397984,0.03073479475826,213406.28694057
 74 | 73,-14,330,9,0.14818157732487,0.030045662116259,216401.18172574
 75 | 74,-11.65,366,10,0.15149701762199,0.0306292267479,219403.89151478
 76 | 75,-14.78125,318,8,0.14722499918938,0.032621489584446,222408.20259905
 77 | 76,-13.527777777778,340,9,0.15238605070114,0.028705019278452,225461.34652686
 78 | 77,-16.625,324,8,0.15269427335262,0.027108968123794,228506.12891603
 79 | 78,-11.9375,319,8,0.14510065102577,0.02813640839234,231499.46051288
 80 | 79,-14.611111111111,357,9,0.1547291328907,0.030465177953243,234527.91388178
 81 | 80,-14.09375,320,8,0.16062089204788,0.028384334482253,237533.10557556
 82 | 81,-13.5,461,12,0.16015643727779,0.029461187843233,240559.18904567
 83 | 82,-15.388888888889,326,9,0.15006931102276,0.029733455169946,243585.51751566
 84 | 83,-8.25,374,10,0.16304328477383,0.028653175026178,246613.00346756
 85 | 84,-15,404,10,0.16597410941124,0.030505876637995,249631.14078665
 86 | 85,-14.777777777778,363,9,0.1793319426775,0.028244167033583,252641.4623425
 87 | 86,-12.818181818182,403,11,0.19172694313526,0.026959577117115,255642.5714035
 88 | 87,-13.75,333,8,0.17769673538208,0.02880460199574,258670.40927029
 89 | 88,-13.027777777778,355,9,0.17875989842415,0.027611443854868,261682.58066964
 90 | 89,-14.555555555556,329,9,0.17230091142654,0.026754371542484,264715.41334033
 91 | 90,-12.25,335,9,0.18951486063004,0.03095425176248,267752.74594522
 92 | 91,-13.7,387,10,0.22818215501308,0.032679638968781,270781.42519116
 93 | 92,-12.416666666667,338,9,0.227643846035,0.027799513872713,273799.21518421
 94 | 93,-13.35,374,10,0.23371605634689,0.030105194956064,276822.62628102
 95 | 94,-16.275,363,10,0.23556252193451,0.029598393268883,279846.54929209
 96 | 95,-16.1875,317,8,0.24673552370071,0.028603928070515,282891.81562114
 97 | 96,-13.555555555556,362,9,0.23167600369453,0.028245551455766,285943.53358817
 98 | 97,-15.35,400,10,0.21881948363781,0.028914291147143,288987.05976105
 99 | 98,-11.444444444444,349,9,0.23058039593697,0.027175322085619,292021.26357698
100 | 99,-12.975,368,10,0.22368267905712,0.026793881300837,295034.41267991
101 | 


--------------------------------------------------------------------------------
/plots/Pong2Player025p_history_B.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-11.25,547,17,-0.022026317223907,0.020198165625334,0
  3 | 2,-20.17,568,25,-0.04641988992691,0.013739513330162,1853.2487101555
  4 | 3,-12.595,1587,50,-0.064600818932056,0.029137924715877,4738.9991998672
  5 | 4,-6.5833333333333,576,21,-0.051100649297237,0.023595076441765,7648.6803650856
  6 | 5,-20.875,1740,82,-0.045074989318848,0.022099993273616,10568.621224165
  7 | 6,-17.090909090909,567,22,-0.016678276002407,0.019567097578198,13514.659698248
  8 | 7,-7.390625,923,32,-0.013433264136314,0.035526426170021,16481.173532248
  9 | 8,-18.6875,582,24,-0.047428324580193,0.024294663522393,19472.213679314
 10 | 9,0.81521739130435,586,23,-0.11416205596924,0.025039088575053,22485.965064049
 11 | 10,-6.1875,567,20,-0.12096249431372,0.035912665607873,25520.983683109
 12 | 11,0.875,553,22,-0.11538840609789,0.032637817841955,28549.404037952
 13 | 12,-7.125,726,22,-0.12101293635368,0.03908227335359,31593.315760136
 14 | 13,-13.407407407407,850,27,-0.14093002921343,0.041807466906495,34623.699748039
 15 | 14,-18.083333333333,517,15,-0.20034552669525,0.056665569891222,37662.579718828
 16 | 15,-3.3846153846154,728,26,-0.265248480618,0.044714272328652,40696.674022913
 17 | 16,-2.75,566,19,-0.25556861174107,0.049150885151234,43748.12767005
 18 | 17,0.9030612244898,1244,49,-0.24537132835388,0.045838969773613,46811.069835186
 19 | 18,-1.875,1005,38,-0.20086278235912,0.049287679118104,49869.176441431
 20 | 19,-0.93181818181818,598,22,-0.15542751729488,0.04646459036693,52920.243164301
 21 | 20,-13.180555555556,714,18,-0.15927488678694,0.046686877326109,55978.05390048
 22 | 21,-14.486842105263,704,19,-0.16698158144951,0.044650202055927,59030.078924179
 23 | 22,0.75,1139,44,-0.093899932324886,0.043945835517719,62078.087513208
 24 | 23,-14.671052631579,709,19,-0.062830573558807,0.045137236217037,65080.992939234
 25 | 24,-12.710526315789,598,19,-0.033643757283688,0.045228893704712,68080.805202246
 26 | 25,-17.973214285714,929,28,0.014084318518639,0.047282483488321,71091.365906477
 27 | 26,-11.805555555556,696,18,0.048644723296165,0.045284945465624,74456.818608284
 28 | 27,-11.039473684211,691,19,0.14962560451031,0.041659969978034,77459.906303406
 29 | 28,-3.3035714285714,847,28,0.091666147172451,0.041126533688977,80472.901323557
 30 | 29,-10.517857142857,519,14,0.092228712022305,0.04186979227839,83495.177422523
 31 | 30,-7.5125,686,20,0.12615469282866,0.038390983216465,86509.107819557
 32 | 31,-16.045454545455,427,11,0.16480913150311,0.038114958232269,89516.892535448
 33 | 32,-15.895833333333,439,12,0.17521106117964,0.040174690205604,92509.944949389
 34 | 33,-12.519230769231,493,13,0.19621461397409,0.044320241136476,95515.364245415
 35 | 34,-10.826923076923,500,13,0.16478013241291,0.03882261980325,98538.464844465
 36 | 35,-17.153846153846,490,13,0.14321630263329,0.041511953729205,101545.67380142
 37 | 36,-15.5,489,12,0.11221461379528,0.04190587349236,104565.05976629
 38 | 37,-11.886363636364,415,11,0.098011219799519,0.037842316889204,107557.31085825
 39 | 38,-13.788461538462,449,13,0.1094850730896,0.036203425718471,110563.57991529
 40 | 39,-14.514705882353,643,17,0.096934471487999,0.037548641140573,113553.20760727
 41 | 40,-12.613636363636,439,11,0.12856424659491,0.035460023349151,116573.10384536
 42 | 41,-17.3,539,15,0.14773236596584,0.034919066805858,119602.37818432
 43 | 42,-14.777777777778,360,9,0.13221014523506,0.035461233332753,122643.66008639
 44 | 43,-16.583333333333,481,12,0.1067987023592,0.031970723539591,125665.90826011
 45 | 44,-15.409090909091,420,11,0.13014798927307,0.035180839031935,128680.62530828
 46 | 45,-13.272727272727,407,11,0.096371849060059,0.035471203010529,131705.14903736
 47 | 46,-12.4,372,10,0.12734847903252,0.034236836878583,134712.71475434
 48 | 47,-6,396,12,0.1212975230813,0.030816859727725,137733.19209051
 49 | 48,-15.805555555556,374,9,0.1186571495533,0.035329016719013,140749.8140893
 50 | 49,-13.425,380,10,0.086061958432198,0.0307808713587,143770.8537302
 51 | 50,-15.055555555556,346,9,0.090420981049538,0.032709428630769,146801.87147307
 52 | 51,-12.4375,478,12,0.11497986483574,0.031309688353911,149838.03255987
 53 | 52,-10.375,374,10,0.11617774283886,0.032193318966776,152843.86857486
 54 | 53,-10.375,361,10,0.1357577880621,0.036922869921662,155836.19525099
 55 | 54,-10.875,374,10,0.14001373767853,0.039279997609556,158878.77326703
 56 | 55,-11.772727272727,413,11,0.13920881605148,0.030879472235218,161907.93762589
 57 | 56,-10.45,359,10,0.13384447109699,0.034293823663145,164959.00357485
 58 | 57,-14.4,376,10,0.12579261171818,0.032977014446631,167964.01353598
 59 | 58,-12.833333333333,360,9,0.13741466021538,0.034957044512033,170963.46770382
 60 | 59,-13.675,374,10,0.13554467487335,0.032587719732895,173959.79004407
 61 | 60,-9.675,359,10,0.1283846218586,0.03228870106861,176995.46294403
 62 | 61,-1.8833333333333,429,15,0.10752998292446,0.033881811052561,180024.85727596
 63 | 62,-14.431818181818,419,11,0.13602916538715,0.031183257451281,183063.08219004
 64 | 63,-12.55,364,10,0.1411640816927,0.035830748300999,186085.26321888
 65 | 64,-13.675,364,10,0.13390322732925,0.03039492443949,189114.62298989
 66 | 65,-15,365,10,0.16171702218056,0.031493642576039,192137.97750092
 67 | 66,-16.305555555556,361,9,0.14895478498936,0.028515401256271,195184.31896782
 68 | 67,-14.111111111111,330,9,0.14906409704685,0.034287901156582,198208.45438385
 69 | 68,-15.722222222222,314,9,0.16527233350277,0.031774870052934,201264.16876793
 70 | 69,-9.025,361,10,0.1847889149189,0.0319969650805,204330.46117401
 71 | 70,-14.305555555556,343,9,0.18708206498623,0.029576429188251,207367.36512089
 72 | 71,-16.75,340,9,0.19180698692799,0.028971399366856,210394.67483687
 73 | 72,-14.222222222222,335,9,0.23490184652805,0.034007699653506,213406.28694057
 74 | 73,-12.75,330,9,0.22783434331417,0.029517960537225,216401.18172574
 75 | 74,-14.15,366,10,0.2203871024847,0.029430883809924,219403.89151478
 76 | 75,-13.0625,318,8,0.22859213721752,0.029511279068887,222408.20259905
 77 | 76,-13.805555555556,340,9,0.20844653385878,0.027854178383946,225461.34652686
 78 | 77,-10.84375,324,8,0.20289381372929,0.02774667654559,228506.12891603
 79 | 78,-16.46875,319,8,0.16040451908112,0.030247389111668,231499.46051288
 80 | 79,-12.805555555556,357,9,0.14438503944874,0.030913321133703,234527.91388178
 81 | 80,-13.9375,320,8,0.16219390559196,0.032817014321685,237533.10557556
 82 | 81,-13.5,461,12,0.1669005920887,0.029075888812542,240559.18904567
 83 | 82,-10.944444444444,326,9,0.14216573596001,0.028492409624159,243585.51751566
 84 | 83,-17.625,374,10,0.17459906148911,0.029121529079974,246613.00346756
 85 | 84,-13.125,404,10,0.16748613035679,0.028448160957545,249631.14078665
 86 | 85,-12.138888888889,363,9,0.17306335294247,0.029834900505841,252641.4623425
 87 | 86,-13.840909090909,403,11,0.18577182674408,0.03069648785796,255642.5714035
 88 | 87,-14.375,333,8,0.16826687574387,0.027692098695785,258670.40927029
 89 | 88,-14.972222222222,355,9,0.15338295567036,0.030362812533975,261682.58066964
 90 | 89,-12.611111111111,329,9,0.15686451637745,0.031322851724923,264715.41334033
 91 | 90,-14.75,335,9,0.17532988417149,0.028876138456166,267752.74594522
 92 | 91,-13.075,387,10,0.14850094485283,0.029779071172699,270781.42519116
 93 | 92,-14.5,338,9,0.14405781364441,0.026694667607546,273799.21518421
 94 | 93,-13.35,374,10,0.17320177900791,0.028948133006692,276822.62628102
 95 | 94,-10.275,363,10,0.18066636753082,0.027836792185903,279846.54929209
 96 | 95,-12.125,317,8,0.20752841413021,0.027108559463173,282891.81562114
 97 | 96,-13.694444444444,362,9,0.18776108336449,0.027645866759121,285943.53358817
 98 | 97,-13.225,400,10,0.18295483493805,0.028122058689594,288987.05976105
 99 | 98,-15.472222222222,349,9,0.1813165242672,0.026570522945374,292021.26357698
100 | 99,-14.1,368,10,0.16999678957462,0.030394074514508,295034.41267991
101 | 


--------------------------------------------------------------------------------
/plots/Pong2Player025p_meanq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_meanq.png


--------------------------------------------------------------------------------
/plots/Pong2Player025p_reward_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_reward_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player025p_tderror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_tderror.png


--------------------------------------------------------------------------------
/plots/Pong2Player025p_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_time.png


--------------------------------------------------------------------------------
/plots/Pong2Player05.csv:
--------------------------------------------------------------------------------
 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB
 2 | 49;1;72;188;31;47872;-23,5;-23
 3 | 49;2;65;184;31;47860;-24;-22,5
 4 | 49;3;76;165;27;49880;-20,5;-20
 5 | 49;4;73;185;29;48308;-22,5;-21
 6 | 49;5;72;227;36;44252;-28;-26
 7 | 49;6;42;120;20;54200;-14,5;-15,5
 8 | 49;7;81;188;31;48052;-23,5;-23
 9 | 49;8;65;186;30;48032;-22,5;-22,5
10 | 49;9;57;152;24;51260;-19,5;-16,5
11 | 49;10;86;205;31;46752;-24,5;-22
12 | 


--------------------------------------------------------------------------------
/plots/Pong2Player05_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_average_reward.png


--------------------------------------------------------------------------------
/plots/Pong2Player05_episode_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_episode_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player05_history_A.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-23.565789473684,1060,38,-0.067710862189531,0.034837675005198,0
  3 | 2,-22.15,567,20,-0.15684062993526,0.038721079081297,2135.1606602669
  4 | 3,-21.365384615385,773,26,-0.25446475175023,0.042217464238405,5015.7156863213
  5 | 4,-10.5,1810,86,-0.30862446391582,0.046876634538174,7923.8975212574
  6 | 5,-11.36,556,25,-0.35418861603737,0.043869868814945,10845.98303318
  7 | 6,-13.795454545455,553,22,-0.35598142755032,0.036427813351154,13779.444419146
  8 | 7,-14.739130434783,545,23,-0.38907803559303,0.038076591491699,16730.656761169
  9 | 8,-10.571428571429,1786,84,-0.40729899406433,0.038895908117294,19701.781215191
 10 | 9,-12.2,1143,50,-0.50314592260122,0.041101832091808,22691.036135197
 11 | 10,-17.723684210526,1062,38,-0.50624308091402,0.035921704351902,25693.220480204
 12 | 11,-23.466666666667,906,30,-0.54614569222927,0.045043568909168,28695.295013189
 13 | 12,-23.625,542,20,-0.60700052964687,0.035269764125347,31699.539398193
 14 | 13,-13.863636363636,544,22,-0.61513235259056,0.033319144189358,34705.274416924
 15 | 14,-23.578947368421,503,19,-0.59529578441381,0.031388927638531,37707.174799919
 16 | 15,-27.769230769231,473,13,-0.58331776094437,0.034846514195204,40709.99874115
 17 | 16,-23.35,530,20,-0.57454408812523,0.04202400124073,43711.185202837
 18 | 17,-23.333333333333,547,21,-0.58036920952797,0.035804619640112,46715.193037987
 19 | 18,-26.153846153846,473,13,-0.55386888027191,0.031593176573515,49718.601107836
 20 | 19,-23.607142857143,507,14,-0.61488328480721,0.039815872758627,52720.651567936
 21 | 20,-29.727272727273,454,11,-0.5656794731617,0.039149179130793,55723.797693014
 22 | 21,-25.791666666667,455,12,-0.54490308320522,0.036643647104502,58725.664307117
 23 | 22,-24.115384615385,465,13,-0.5066512928009,0.040177868783474,61731.177309275
 24 | 23,-26.409090909091,417,11,-0.4840882229805,0.039173430681229,64735.67158699
 25 | 24,-29.045454545455,425,11,-0.45775239634514,0.037823288202286,67730.783874989
 26 | 25,-27.181818181818,421,11,-0.47300417554379,0.038609686613083,70763.969098806
 27 | 26,-25.884615384615,458,13,-0.45065103256702,0.038901566147804,73791.41166997
 28 | 27,-27.35,371,10,-0.50865966272354,0.048145369470119,76837.669150829
 29 | 28,-28.409090909091,400,11,-0.47566920948029,0.038639131039381,79881.43487668
 30 | 29,-26.272727272727,413,11,-0.47156949782372,0.044437561452389,82939.544976711
 31 | 30,-27.7,405,10,-0.44191358315945,0.037687044411898,85980.046920538
 32 | 31,-27.95,413,10,-0.4227322665453,0.038398827344179,89025.800366402
 33 | 32,-27.954545454545,403,11,-0.44173110306263,0.036418485343456,92063.302292347
 34 | 33,-28.75,387,10,-0.43116731095314,0.035739500701427,95107.170580387
 35 | 34,-27.25,397,10,-0.4147837843895,0.03450074160099,98144.554581642
 36 | 35,-27.590909090909,383,11,-0.41931443274021,0.034484442532063,101187.45636582
 37 | 36,-28.083333333333,433,12,-0.41124797463417,0.037311921685934,104224.58697462
 38 | 37,-28.409090909091,410,11,-0.40026402807236,0.036159102261066,107265.20277381
 39 | 38,-27.95,392,10,-0.38983296847343,0.037148823037744,110301.78797388
 40 | 39,-27.5,395,11,-0.38342036378384,0.035200391098857,113344.96769667
 41 | 40,-26.727272727273,379,11,-0.41330251276493,0.04031700232625,116381.36457777
 42 | 41,-28.333333333333,357,9,-0.38927046716213,0.033651781499386,119425.28358245
 43 | 42,-26.318181818182,376,11,-0.38498378384113,0.035465506106615,122464.9944644
 44 | 43,-28.1,372,10,-0.37079898560047,0.036857142180204,125509.42432022
 45 | 44,-26.45,351,10,-0.3712386251688,0.034880570158362,128547.34342337
 46 | 45,-28.8,385,10,-0.35417822515965,0.033587526723742,131589.36552644
 47 | 46,-26.909090909091,368,11,-0.32902699255943,0.03582143689692,134627.12757063
 48 | 47,-27.65,362,10,-0.34593545436859,0.036205421105027,137668.57984638
 49 | 48,-27.05,371,10,-0.31687743508816,0.034691168367863,140704.32613134
 50 | 49,-27.5,348,10,-0.32036348569393,0.036744026735425,143746.68808866
 51 | 50,-28.222222222222,337,9,-0.30661095154285,0.032667124561965,146784.51735163
 52 | 51,-27.15,339,10,-0.28461887812614,0.033706795692444,149827.91408563
 53 | 52,-26.055555555556,328,9,-0.25844291603565,0.033171407248825,152864.17974257
 54 | 53,-28.055555555556,337,9,-0.20742945981026,0.03217628205684,155907.42605758
 55 | 54,-26.85,347,10,-0.22025959646702,0.034844072170556,158945.75263953
 56 | 55,-29.333333333333,369,9,-0.19502455461025,0.034335268455092,161993.8620522
 57 | 56,-27.611111111111,334,9,-0.19381985235214,0.03531576907495,165028.32910323
 58 | 57,-28.3125,338,8,-0.20123857879639,0.033415464806138,168068.62531328
 59 | 58,-27.222222222222,347,9,-0.21830435657501,0.032280782226182,171106.50689602
 60 | 59,-28.5,367,9,-0.1951986335516,0.033280624641455,174149.95089102
 61 | 60,-27.277777777778,342,9,-0.19836646234989,0.029407955253031,177187.28591776
 62 | 61,-27.45,343,10,-0.21108313941956,0.03160761183966,180230.49803877
 63 | 62,-27.15,348,10,-0.20018597757816,0.033951800141484,183269.95338082
 64 | 63,-27.555555555556,318,9,-0.18895139217377,0.03070913996594,186314.83098292
 65 | 64,-28,318,8,-0.18499350523949,0.029948033646564,189352.4879508
 66 | 65,-27.8125,319,8,-0.19815232717991,0.030979598897276,192396.61688399
 67 | 66,-29,331,8,-0.2042354580164,0.034979876468889,195436.35396481
 68 | 67,-25.777777777778,340,9,-0.18630485057831,0.029006195977097,198481.27222991
 69 | 68,-27.222222222222,347,9,-0.16837375974655,0.031752511370927,201521.16469193
 70 | 69,-27.666666666667,343,9,-0.17215054941177,0.032913396080025,204565.77418089
 71 | 70,-27.666666666667,326,9,-0.16846440696716,0.030616780133918,207606.91884899
 72 | 71,-28.388888888889,350,9,-0.16197844016552,0.02940061276406,210652.6123538
 73 | 72,-28.3125,331,8,-0.16286637437344,0.030343113770243,213691.91739488
 74 | 73,-28.166666666667,322,9,-0.17059023880959,0.028513457268826,216737.55602193
 75 | 74,-28.4375,306,8,-0.16656371164322,0.029049847560003,219778.28505278
 76 | 75,-28.555555555556,355,9,-0.17663783371449,0.028326682368293,222824.99759769
 77 | 76,-25.5625,308,8,-0.16812364423275,0.028427452790784,225863.61633468
 78 | 77,-28,313,8,-0.17012423825264,0.029179202558938,228906.90767956
 79 | 78,-25.6,330,10,-0.15948258709908,0.029502675250173,231945.58412576
 80 | 79,-27.15,340,10,-0.17163393580914,0.031709079549648,234990.06086373
 81 | 80,-28.9375,309,8,-0.17488246464729,0.029555166413484,238032.13610172
 82 | 81,-28.357142857143,305,7,-0.16523126077652,0.028102440752089,241078.97931576
 83 | 82,-28.055555555556,349,9,-0.17386763191223,0.029353871364699,244120.02995491
 84 | 83,-28.375,313,8,-0.19983051693439,0.031149187764386,247165.05484152
 85 | 84,-26.625,307,8,-0.17532221388817,0.029301581036765,250204.44619155
 86 | 85,-25.75,291,8,-0.17028263640404,0.028546538640745,253247.87504554
 87 | 86,-27.714285714286,287,7,-0.16522393465042,0.031246546869166,256287.07864952
 88 | 87,-26.5,289,8,-0.15511447227001,0.033023809224367,259332.19374633
 89 | 88,-28.125,304,8,-0.15494792342186,0.029552731960081,262373.06913757
 90 | 89,-24.5625,286,8,-0.14808536505699,0.027394249363802,265416.93225241
 91 | 90,-27.5,292,8,-0.1341061425209,0.028037310602143,268457.4805975
 92 | 91,-28.0625,287,8,-0.132041872859,0.028428069008514,271504.65070438
 93 | 92,-29.5,293,7,-0.13780510485172,0.029911585349124,274544.11319828
 94 | 93,-28.357142857143,291,7,-0.12779329800606,0.028778882302344,277592.01316142
 95 | 94,-26.5,292,8,-0.12557261633873,0.028597356364131,280631.97059441
 96 | 95,-24.5,274,7,-0.13175105500221,0.029224281116389,283677.72791409
 97 | 96,-27,296,8,-0.11238371729851,0.028369416657835,286719.95823526
 98 | 97,-27.625,285,8,-0.10372178995609,0.029483026944101,289765.93972826
 99 | 98,-28.5,276,7,-0.10349846041203,0.02689660487324,292806.88323212
100 | 99,-27.714285714286,275,7,-0.089309795498848,0.030658142995089,295852.95120811
101 | 


--------------------------------------------------------------------------------
/plots/Pong2Player05_history_B.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-18.118421052632,1060,38,-0.066835069596767,0.02591877129674,0
  3 | 2,-19.25,567,20,-0.18020111098886,0.02074752292037,2135.1606602669
  4 | 3,-23,773,26,-0.23922270989418,0.02354782563448,5015.7156863213
  5 | 4,-21,1810,86,-0.27857883024216,0.033000697731972,7923.8975212574
  6 | 5,-21.28,556,25,-0.33738064062595,0.022831210553646,10845.98303318
  7 | 6,-22.613636363636,553,22,-0.36825464832783,0.035995476484299,13779.444419146
  8 | 7,-20.478260869565,545,23,-0.37289713126421,0.021897726356983,16730.656761169
  9 | 8,-21.035714285714,1786,84,-0.43349884229898,0.038497683167458,19701.781215191
 10 | 9,-21.85,1143,50,-0.62161118167639,0.033715693116188,22691.036135197
 11 | 10,-23.842105263158,1062,38,-0.63965305072069,0.046260520637035,25693.220480204
 12 | 11,-21.433333333333,906,30,-0.64699254095554,0.053769414335489,28695.295013189
 13 | 12,-15.75,542,20,-0.66198631811142,0.041308878362179,31699.539398193
 14 | 13,-22.272727272727,544,22,-0.66889968365431,0.043348840415478,34705.274416924
 15 | 14,-15.657894736842,503,19,-0.67822147250175,0.034765207916498,37707.174799919
 16 | 15,-24.038461538462,473,13,-0.69069452720881,0.044168668866158,40709.99874115
 17 | 16,-15.2,530,20,-0.62808234894276,0.043116836935282,43711.185202837
 18 | 17,-15.166666666667,547,21,-0.61980935025215,0.058740401506424,46715.193037987
 19 | 18,-27.5,473,13,-0.60524166983366,0.044296518594027,49718.601107836
 20 | 19,-27.178571428571,507,14,-0.55743600380421,0.039298461139202,52720.651567936
 21 | 20,-30,454,11,-0.54200337249041,0.047728110402822,55723.797693014
 22 | 21,-27.333333333333,455,12,-0.53479717481136,0.046402426108718,58725.664307117
 23 | 22,-27.807692307692,465,13,-0.59352023053169,0.044323350936174,61731.177309275
 24 | 23,-27.181818181818,417,11,-0.57141543602943,0.048805070489645,64735.67158699
 25 | 24,-28.363636363636,425,11,-0.55112496232986,0.046088153421879,67730.783874989
 26 | 25,-28.454545454545,421,11,-0.52232072466612,0.043629713505507,70763.969098806
 27 | 26,-25.461538461538,458,13,-0.51326719427109,0.042730105251074,73791.41166997
 28 | 27,-23.95,371,10,-0.48516096317768,0.039971062466502,76837.669150829
 29 | 28,-26,400,11,-0.47914367777109,0.04138122522831,79881.43487668
 30 | 29,-25.954545454545,413,11,-0.51456713736057,0.04437631753087,82939.544976711
 31 | 30,-28.85,405,10,-0.48131128048897,0.039471878141165,85980.046920538
 32 | 31,-28.9,413,10,-0.41247966814041,0.043670239090919,89025.800366402
 33 | 32,-25.909090909091,403,11,-0.4012852845192,0.03890059760958,92063.302292347
 34 | 33,-26.15,387,10,-0.37383976536989,0.045206182360649,95107.170580387
 35 | 34,-27.2,397,10,-0.34854346996546,0.035068557243794,98144.554581642
 36 | 35,-23.681818181818,383,11,-0.38900647234917,0.035556656509638,101187.45636582
 37 | 36,-24.791666666667,433,12,-0.3510031542182,0.03507110093534,104224.58697462
 38 | 37,-26,410,11,-0.33158691716194,0.036370992548764,107265.20277381
 39 | 38,-26.65,392,10,-0.346440944314,0.036729044564068,110301.78797388
 40 | 39,-26.090909090909,395,11,-0.35682825219631,0.036668726488948,113344.96769667
 41 | 40,-23.318181818182,379,11,-0.31966248720884,0.037916492588818,116381.36457777
 42 | 41,-26,357,9,-0.31109520059824,0.036470890812576,119425.28358245
 43 | 42,-23.045454545455,376,11,-0.29166408789158,0.039427526202053,122464.9944644
 44 | 43,-25.3,372,10,-0.29205604588985,0.035345522232354,125509.42432022
 45 | 44,-21.4,351,10,-0.29813618457317,0.035149823502637,128547.34342337
 46 | 45,-26.4,385,10,-0.28132370436192,0.033947332246229,131589.36552644
 47 | 46,-22.318181818182,368,11,-0.22567233264446,0.034842983233277,134627.12757063
 48 | 47,-23.8,362,10,-0.23975029945374,0.041935967186466,137668.57984638
 49 | 48,-25.75,371,10,-0.21593493235111,0.035218815199565,140704.32613134
 50 | 49,-24.25,348,10,-0.21577809095383,0.034836648333818,143746.68808866
 51 | 50,-27.277777777778,337,9,-0.19347376406193,0.035338229847956,146784.51735163
 52 | 51,-22.8,339,10,-0.18803761428595,0.034529903292656,149827.91408563
 53 | 52,-24.777777777778,328,9,-0.18428690427542,0.035586897883564,152864.17974257
 54 | 53,-26.277777777778,337,9,-0.16982698947191,0.037080030148849,155907.42605758
 55 | 54,-22.8,347,10,-0.1805264787674,0.036383127157111,158945.75263953
 56 | 55,-28,369,9,-0.19619113898277,0.039251363170333,161993.8620522
 57 | 56,-23.722222222222,334,9,-0.16315816771984,0.035299850299722,165028.32910323
 58 | 57,-27.5625,338,8,-0.17962335503101,0.036802242480684,168068.62531328
 59 | 58,-25.444444444444,347,9,-0.18439667832851,0.038928838141263,171106.50689602
 60 | 59,-27,367,9,-0.17241037005186,0.035762713017873,174149.95089102
 61 | 60,-26.055555555556,342,9,-0.16759346574545,0.033813355355524,177187.28591776
 62 | 61,-23.4,343,10,-0.17988516628742,0.035570928469766,180230.49803877
 63 | 62,-24.75,348,10,-0.14266111797094,0.034371860966086,183269.95338082
 64 | 63,-23.944444444444,318,9,-0.16104466897249,0.03449313522689,186314.83098292
 65 | 64,-26.1875,318,8,-0.15914500325918,0.033191479151836,189352.4879508
 66 | 65,-24.5,319,8,-0.16667608141899,0.03099166682642,192396.61688399
 67 | 66,-28.1875,331,8,-0.17711827391386,0.034276712449268,195436.35396481
 68 | 67,-28.222222222222,340,9,-0.1667426995039,0.041464160232805,198481.27222991
 69 | 68,-26.444444444444,347,9,-0.15887207114697,0.03303444356285,201521.16469193
 70 | 69,-24.666666666667,343,9,-0.16351392567158,0.034325236777309,204565.77418089
 71 | 70,-25.333333333333,326,9,-0.16442284065485,0.037677678430919,207606.91884899
 72 | 71,-28.111111111111,350,9,-0.15780285811424,0.033131264870055,210652.6123538
 73 | 72,-26.25,331,8,-0.16163712793589,0.033998891314492,213691.91739488
 74 | 73,-24.833333333333,322,9,-0.15641409653425,0.033916867008433,216737.55602193
 75 | 74,-26.5,306,8,-0.18017208111286,0.034290954723489,219778.28505278
 76 | 75,-27.777777777778,355,9,-0.20218924224377,0.034520228855079,222824.99759769
 77 | 76,-28.0625,308,8,-0.1884888920784,0.03200924059283,225863.61633468
 78 | 77,-25.0625,313,8,-0.19379569894075,0.032447021281347,228906.90767956
 79 | 78,-19.7,330,10,-0.16559742546082,0.032409753373824,231945.58412576
 80 | 79,-22.8,340,10,-0.17583794850111,0.033432859681547,234990.06086373
 81 | 80,-28.4375,309,8,-0.17665959024429,0.031948379773647,238032.13610172
 82 | 81,-29.5,305,7,-0.18224990254641,0.03095829824172,241078.97931576
 83 | 82,-27.777777777778,349,9,-0.18671671444178,0.029398809920065,244120.02995491
 84 | 83,-28.8125,313,8,-0.16162476235628,0.032981981403194,247165.05484152
 85 | 84,-26.25,307,8,-0.16804763782024,0.033460189212114,250204.44619155
 86 | 85,-28.4375,291,8,-0.14077527183294,0.033014024126343,253247.87504554
 87 | 86,-26.928571428571,287,7,-0.17255865395069,0.031409454064909,256287.07864952
 88 | 87,-24.3125,289,8,-0.16105569815636,0.034144075133605,259332.19374633
 89 | 88,-27.1875,304,8,-0.17937077999115,0.030995232569054,262373.06913757
 90 | 89,-27.5625,286,8,-0.17962557536364,0.029845997454599,265416.93225241
 91 | 90,-26.5,292,8,-0.15064305710793,0.032113508773036,268457.4805975
 92 | 91,-24.8125,287,8,-0.1413662314415,0.029414703928109,271504.65070438
 93 | 92,-28.571428571429,293,7,-0.14779610908031,0.032279317857698,274544.11319828
 94 | 93,-28.214285714286,291,7,-0.14539888328314,0.030164272069,277592.01316142
 95 | 94,-27.3125,292,8,-0.13699457347393,0.030221082478762,280631.97059441
 96 | 95,-28,274,7,-0.15508052033186,0.029217592090368,283677.72791409
 97 | 96,-24.375,296,8,-0.16006426233053,0.030165801534429,286719.95823526
 98 | 97,-23.75,285,8,-0.13804284149408,0.033159527854994,289765.93972826
 99 | 98,-29.142857142857,276,7,-0.13101159918308,0.029509387072641,292806.88323212
100 | 99,-24.785714285714,275,7,-0.14010148745775,0.028426378406119,295852.95120811
101 | 


--------------------------------------------------------------------------------
/plots/Pong2Player05_meanq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_meanq.png


--------------------------------------------------------------------------------
/plots/Pong2Player05_reward_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_reward_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player05_tderror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_tderror.png


--------------------------------------------------------------------------------
/plots/Pong2Player05_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_time.png


--------------------------------------------------------------------------------
/plots/Pong2Player05p_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_average_reward.png


--------------------------------------------------------------------------------
/plots/Pong2Player05p_episode_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_episode_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player05p_history_A.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-12.190476190476,566,21,0.0016725579500198,0.029398417342454,0
  3 | 2,-13.184210526316,544,19,0.0090621307790279,0.027209096094593,2521.3481161594
  4 | 3,-4.625,746,24,0.0068334065973759,0.026827841966413,5990.4787950516
  5 | 4,-8.5862068965517,817,29,0.026928184151649,0.028336934078485,9408.9113001823
  6 | 5,8.6041666666667,554,24,0.029909803807735,0.031383067324758,12723.580034971
  7 | 6,-12.421052631579,563,19,0.046930429577827,0.025520376451313,16028.401235104
  8 | 7,-0.078947368421053,552,19,0.072756165266037,0.032294995471835,19352.323368073
  9 | 8,9.6125,1759,80,0.092696217417717,0.034000042662024,22690.247012377
 10 | 9,-10.340909090909,563,22,0.059791999518871,0.0315357653182,26035.510334253
 11 | 10,-13.886363636364,727,22,0.041338154196739,0.050249467393383,29387.56384325
 12 | 11,1.5555555555556,525,18,0.015745346724987,0.047766451098025,32737.631895304
 13 | 12,-0.63157894736842,619,19,-0.0079051206707954,0.040352991356369,36082.453429222
 14 | 13,-6.436170212766,1535,47,0.05316678750515,0.042652126014233,39442.614874363
 15 | 14,-11.119047619048,1401,42,0.038818866789341,0.048164960654452,42395.264083385
 16 | 15,-13.583333333333,1384,42,0.10071529769897,0.048555156112649,45346.400170326
 17 | 16,3.4901960784314,1428,51,0.021332919180393,0.052922020385042,48300.30147934
 18 | 17,-13.119047619048,670,21,0.048792710542679,0.05994572978653,51265.125099182
 19 | 18,-8.7916666666667,825,24,0.15720679599047,0.049980003822595,54222.657976151
 20 | 19,-10.657894736842,705,19,0.16264330422878,0.045774164577946,57176.750576973
 21 | 20,-1.3809523809524,718,21,0.23168902504444,0.079540585922077,60134.317682028
 22 | 21,-0.6551724137931,936,29,0.26441657495499,0.048808562807739,63097.577541113
 23 | 22,-8.8409090909091,835,22,0.28615098077059,0.048952121356502,66048.6204772
 24 | 23,-8.7,765,20,0.3989036950469,0.048634952548891,69002.973978281
 25 | 24,-4.125,580,16,0.4231478330493,0.046354526635259,71961.710373402
 26 | 25,-5.5,612,17,0.44248150074482,0.053494612276554,74946.282803059
 27 | 26,-5.452380952381,759,21,0.45483417850733,0.045858541212976,77999.638302326
 28 | 27,-7.4411764705882,627,17,0.41578766536713,0.041342475254089,81190.814685345
 29 | 28,-7.2,595,15,0.42314593607187,0.039326364312321,84337.967757225
 30 | 29,-12.2,860,25,0.47111334341764,0.040940447449684,87465.261223078
 31 | 30,-4.9285714285714,528,14,0.47050801891088,0.042695259518921,92498.191892147
 32 | 31,-12.133333333333,524,15,0.45293545377254,0.04539662534371,95936.109339952
 33 | 32,-12.875,427,12,0.45350726288557,0.038790578894317,98887.751207113
 34 | 33,-11.142857142857,759,21,0.44122585648298,0.038887200295925,101841.51401019
 35 | 34,-7,550,14,0.44989166086912,0.037162220109254,104789.37532616
 36 | 35,-12.1,560,15,0.44096420383453,0.037667083889246,107743.42333221
 37 | 36,-11.964285714286,545,14,0.41148587912321,0.040263948976994,110687.87245607
 38 | 37,-9.0384615384615,453,13,0.37986992645264,0.040563675947487,113641.68067718
 39 | 38,-9.8,568,15,0.38601289665699,0.036921709775925,116585.00527716
 40 | 39,-7.4285714285714,521,14,0.38462983584404,0.041262718304992,119538.83113694
 41 | 40,-7.6538461538462,491,13,0.35478970843554,0.036659536957741,122487.80203819
 42 | 41,-8.75,401,10,0.36146842372417,0.0384386491552,125440.84892488
 43 | 42,-8.85,383,10,0.3722738249898,0.03783643848449,128385.05406904
 44 | 43,-3.2272727272727,413,11,0.35299725008011,0.035210303295404,131337.15963507
 45 | 44,-8.3571428571429,533,14,0.33448323339224,0.036093774173409,134284.86390018
 46 | 45,-8.6153846153846,489,13,0.37744086462259,0.033056518763304,137272.45820832
 47 | 46,-6.2692307692308,478,13,0.3672527294755,0.033928428951651,140232.80584526
 48 | 47,-6.5,426,11,0.3415291916728,0.033964893162251,143187.39262938
 49 | 48,-10.884615384615,478,13,0.32564502882957,0.036381606638432,146151.68410921
 50 | 49,-7.65625,588,16,0.32519111514091,0.031207777202129,149129.0859623
 51 | 50,-5.0357142857143,503,14,0.30523685032129,0.031699912421405,152100.95436621
 52 | 51,-11.590909090909,410,11,0.28875471013784,0.0325832104648,155060.54105639
 53 | 52,-10.772727272727,397,11,0.27054508757591,0.035913828574121,158014.06233835
 54 | 53,-8.6666666666667,355,9,0.26279150092602,0.03167043646425,160989.72913432
 55 | 54,-12.727272727273,437,11,0.28409675872326,0.031264732986689,163942.92208934
 56 | 55,-10.722222222222,332,9,0.28172516745329,0.036271000921726,166903.69907832
 57 | 56,-9.5555555555556,348,9,0.28384761965275,0.030679102502763,169867.21692038
 58 | 57,-8.1666666666667,429,12,0.28735686922073,0.036207194432616,172831.57644558
 59 | 58,-11.181818181818,415,11,0.29033307236433,0.033343236750923,175790.21458626
 60 | 59,-14.590909090909,375,11,0.30354784888029,0.036958446115255,178745.57849836
 61 | 60,-7.35,415,10,0.2884727704525,0.033673165328801,181704.80793929
 62 | 61,-9.8461538461538,523,13,0.29265542846918,0.032939915321767,184672.56316423
 63 | 62,-11.384615384615,515,13,0.27368097442389,0.033678608834744,187634.86896515
 64 | 63,-10.791666666667,464,12,0.25689987742901,0.029657803095877,190609.77085805
 65 | 64,-9.9583333333333,477,12,0.24411515450478,0.033804434772581,193575.23226714
 66 | 65,-9,444,12,0.23694155526161,0.031689680457115,196543.12847924
 67 | 66,-12.590909090909,430,11,0.24975286895037,0.031562295537442,199508.03181028
 68 | 67,-11.5,467,13,0.26802344965935,0.030717463061213,202485.6055882
 69 | 68,-11.5,396,10,0.27379069811106,0.03399647770822,205457.95847321
 70 | 69,-7.4583333333333,462,12,0.27509512645006,0.030351248666644,208423.81562114
 71 | 70,-3,382,11,0.27350761300325,0.033080709680915,211385.97344613
 72 | 71,-10.4,378,10,0.26220123666525,0.030410955883563,214351.19829679
 73 | 72,-12.291666666667,429,12,0.28032770246267,0.029351569324732,217308.98916483
 74 | 73,-8.0625,331,8,0.28249229198694,0.033711596421897,221972.7158649
 75 | 74,-9.15,368,10,0.29987473875284,0.031458505049348,224964.42784786
 76 | 75,-10.722222222222,332,9,0.30762102860212,0.030753311939538,227937.27384114
 77 | 76,-11.181818181818,419,11,0.30988254141808,0.029774959594011,230895.05806398
 78 | 77,-5.4444444444444,324,9,0.29618969237804,0.034235284376889,233852.17733288
 79 | 78,-13,423,11,0.27776360011101,0.034624876495451,236795.76119375
 80 | 79,-9.3888888888889,339,9,0.2646429989934,0.03197483446449,239746.05166459
 81 | 80,-12.95,356,10,0.26202550524473,0.028922409027815,242697.99571753
 82 | 81,-13.961538461538,452,13,0.25358505940437,0.030119795572013,245649.24792051
 83 | 82,-12.5,398,11,0.2351166793704,0.033767471916974,249845.94104743
 84 | 83,-12.538461538462,471,13,0.24164531832933,0.029401979405433,252805.92880058
 85 | 84,-11.833333333333,335,9,0.24289074194431,0.026997715607285,255753.03083158
 86 | 85,-13.708333333333,449,12,0.23900094258785,0.030221252571791,258703.89991164
 87 | 86,-12.388888888889,339,9,0.2479863730073,0.029699892941862,261655.20484948
 88 | 87,-10.791666666667,474,12,0.26849582207203,0.031834553465247,264607.36318159
 89 | 88,-12,351,9,0.26750654709339,0.034056123487651,267564.47844744
 90 | 89,-11.666666666667,420,12,0.25139537072182,0.032396519025788,270516.86856771
 91 | 90,-10.5,335,9,0.28289089840651,0.033902511775494,273472.08324647
 92 | 91,-10.85,385,10,0.27831519240141,0.030725085824728,276433.87832451
 93 | 92,-6.55,374,10,0.28301594740152,0.033748467750847,279379.32505131
 94 | 93,-13.6,359,10,0.28887090998888,0.032397522479296,282328.62156129
 95 | 94,-9.1818181818182,421,11,0.2910089418292,0.034031861945987,285282.38619661
 96 | 95,-13.277777777778,338,9,0.29555907207727,0.031591658495367,288230.86623335
 97 | 96,-12.318181818182,401,11,0.29896997374296,0.032282556593418,291176.53364658
 98 | 97,-12.958333333333,433,12,0.30080125010014,0.032803089857101,294131.79124165
 99 | 98,-12,365,9,0.29047444790602,0.032203946605325,297084.02317357
100 | 99,-11.3,356,10,0.28700837224722,0.032082269743085,300040.07620072
101 | 


--------------------------------------------------------------------------------
/plots/Pong2Player05p_history_B.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-1.1904761904762,566,21,-0.016031626001,0.019111451271921,0
  3 | 2,-0.78947368421053,544,19,-0.015928619652987,0.021086706319824,2521.3481161594
  4 | 3,-10.5,746,24,0.017985911607742,0.023174186084419,5990.4787950516
  5 | 4,-5.3793103448276,817,29,0.035138628304005,0.021087123163044,9408.9113001823
  6 | 5,-19.895833333333,554,24,0.016244427502155,0.020806871149689,12723.580034971
  7 | 6,-2.2368421052632,563,19,0.030516146123409,0.018756742022932,16028.401235104
  8 | 7,-14.131578947368,552,19,0.061523339450359,0.031348906546831,19352.323368073
  9 | 8,-20.55625,1759,80,0.10061441165209,0.026641976721585,22690.247012377
 10 | 9,-2.1590909090909,563,22,0.072689354002476,0.043322645559907,26035.510334253
 11 | 10,-2.4318181818182,727,22,0.044146400034428,0.046073728732765,29387.56384325
 12 | 11,-15.861111111111,525,18,0.040192230582237,0.051530308254063,32737.631895304
 13 | 12,-15.394736842105,619,19,0.0775425760746,0.049443882986903,36082.453429222
 14 | 13,-9.6914893617021,1535,47,0.13757249355316,0.054976292358711,39442.614874363
 15 | 14,-5.4047619047619,1401,42,0.14144300734997,0.051685801447136,42395.264083385
 16 | 15,-2.5119047619048,1384,42,0.17247566401958,0.05379497281462,45346.400170326
 17 | 16,-17.480392156863,1428,51,0.17486859929562,0.052109866876155,48300.30147934
 18 | 17,-2.2619047619048,670,21,0.11819297283888,0.058114196747541,51265.125099182
 19 | 18,-8.2291666666667,825,24,0.17078602439165,0.05748248129338,54222.657976151
 20 | 19,-7.5789473684211,705,19,0.20490509063005,0.063521314557642,57176.750576973
 21 | 20,-15.02380952381,718,21,0.20841271966696,0.064866852200124,60134.317682028
 22 | 21,-14.931034482759,936,29,0.1713255674243,0.064945568051189,63097.577541113
 23 | 22,-9.7272727272727,835,22,0.22036751252413,0.058795872772112,66048.6204772
 24 | 23,-9.825,765,20,0.26435204058886,0.053478279015981,69002.973978281
 25 | 24,-13.6875,580,16,0.28194195884466,0.055717939574271,71961.710373402
 26 | 25,-11.852941176471,612,17,0.31042910403013,0.051862246179953,74946.282803059
 27 | 26,-12.380952380952,759,21,0.30943595552444,0.047173657760024,77999.638302326
 28 | 27,-10.441176470588,627,17,0.323223297894,0.046693474255502,81190.814685345
 29 | 28,-11.4,595,15,0.35061695456505,0.041337524190545,84337.967757225
 30 | 29,-4.82,860,25,0.32666733783484,0.041585235372186,87465.261223078
 31 | 30,-13.071428571429,528,14,0.27311532890797,0.043553937409073,92498.191892147
 32 | 31,-5.3333333333333,524,15,0.24466985583305,0.046891882263124,95936.109339952
 33 | 32,-4.125,427,12,0.27309661978483,0.044171565037221,98887.751207113
 34 | 33,-6.4285714285714,759,21,0.33880872291327,0.041125190071762,101841.51401019
 35 | 34,-11.607142857143,550,14,0.29666004246473,0.04266371575743,104789.37532616
 36 | 35,-6.1,560,15,0.32505999314785,0.039901426881552,107743.42333221
 37 | 36,-6.6071428571429,545,14,0.34931463354826,0.036667717844248,110687.87245607
 38 | 37,-8,453,13,0.34431127136946,0.039972880072892,113641.68067718
 39 | 38,-9,568,15,0.3049511371851,0.0383997557275,116585.00527716
 40 | 39,-10.642857142857,521,14,0.30735584205389,0.037219845436513,119538.83113694
 41 | 40,-9.9615384615385,491,13,0.31245256966352,0.039549522027373,122487.80203819
 42 | 41,-9.5,401,10,0.28530238294601,0.038102595917881,125440.84892488
 43 | 42,-8.55,383,10,0.33918499720097,0.038491429306567,128385.05406904
 44 | 43,-14.136363636364,413,11,0.34722890794277,0.036860883470625,131337.15963507
 45 | 44,-10.392857142857,533,14,0.30450721997023,0.038401826992631,134284.86390018
 46 | 45,-9.4230769230769,489,13,0.31083624172211,0.036519952371716,137272.45820832
 47 | 46,-11.461538461538,478,13,0.31132104611397,0.036799871981144,140232.80584526
 48 | 47,-11.818181818182,426,11,0.3102690179944,0.036717105448246,143187.39262938
 49 | 48,-6.8461538461538,478,13,0.31750721180439,0.033276669070125,146151.68410921
 50 | 49,-10.65625,588,16,0.34079497611523,0.035877283748239,149129.0859623
 51 | 50,-12.75,503,14,0.35767333728075,0.032263810649514,152100.95436621
 52 | 51,-6.1363636363636,410,11,0.35128286981583,0.035170908555388,155060.54105639
 53 | 52,-7.0909090909091,397,11,0.34812305194139,0.033418237604201,158014.06233835
 54 | 53,-9.6666666666667,355,9,0.34887997090816,0.035392617613077,160989.72913432
 55 | 54,-5.6363636363636,437,11,0.32212809282541,0.035544243440032,163942.92208934
 56 | 55,-7.3888888888889,332,9,0.31232571786642,0.034990294538438,166903.69907832
 57 | 56,-8.3888888888889,348,9,0.33573294907808,0.036595604598522,169867.21692038
 58 | 57,-9.6666666666667,429,12,0.32209365475178,0.031012426264584,172831.57644558
 59 | 58,-7.2272727272727,415,11,0.32653800737858,0.034533048078418,175790.21458626
 60 | 59,-1.9090909090909,375,11,0.31488067770004,0.031103349879384,178745.57849836
 61 | 60,-12,415,10,0.29465030092001,0.034411353155971,181704.80793929
 62 | 61,-9.5,523,13,0.28568968135118,0.030104446336627,184672.56316423
 63 | 62,-7.1153846153846,515,13,0.27341676348448,0.032042900033295,187634.86896515
 64 | 63,-7.7916666666667,464,12,0.30227653044462,0.03176537196897,190609.77085805
 65 | 64,-8.3333333333333,477,12,0.31022936862707,0.032570629991591,193575.23226714
 66 | 65,-9.125,444,12,0.29924517524242,0.033795852646232,196543.12847924
 67 | 66,-6.0454545454545,430,11,0.3207828720808,0.032654751021415,199508.03181028
 68 | 67,-6.4230769230769,467,13,0.33412792432308,0.030902617977932,202485.6055882
 69 | 68,-7.3,396,10,0.3365140491128,0.027126253835857,205457.95847321
 70 | 69,-10.208333333333,462,12,0.31692304229736,0.029001407504082,208423.81562114
 71 | 70,-13.772727272727,382,11,0.31260763651133,0.031518132895231,211385.97344613
 72 | 71,-8.3,378,10,0.31329001963139,0.028670164838433,214351.19829679
 73 | 72,-5.0416666666667,429,12,0.31667448115349,0.030446218073368,217308.98916483
 74 | 73,-10.875,331,8,0.31584479349852,0.0295074458085,221972.7158649
 75 | 74,-7.8,368,10,0.3359946603775,0.027101310186088,224964.42784786
 76 | 75,-7.3888888888889,332,9,0.32408956503868,0.028624357804656,227937.27384114
 77 | 76,-6.6818181818182,419,11,0.31864206528664,0.032640161991119,230895.05806398
 78 | 77,-11.777777777778,324,9,0.31573203909397,0.034313679173589,233852.17733288
 79 | 78,-5.2272727272727,423,11,0.30915669256449,0.033906562924385,236795.76119375
 80 | 79,-9.0555555555556,339,9,0.30403732931614,0.033609865434468,239746.05166459
 81 | 80,-4.7,356,10,0.32141356164217,0.032473127380013,242697.99571753
 82 | 81,-3.2307692307692,452,13,0.32835628992319,0.030475623458624,245649.24792051
 83 | 82,-5.2727272727273,398,11,0.32665121507645,0.031192590117455,249845.94104743
 84 | 83,-5.0384615384615,471,13,0.33629900109768,0.029393029239029,252805.92880058
 85 | 84,-5.5,335,9,0.32858689790964,0.03369222028926,255753.03083158
 86 | 85,-3.7083333333333,449,12,0.31140047758818,0.030493652947247,258703.89991164
 87 | 86,-6.2222222222222,339,9,0.32676294052601,0.033686189174652,261655.20484948
 88 | 87,-8.0416666666667,474,12,0.32105655437708,0.031828700378537,264607.36318159
 89 | 88,-6.5,351,9,0.30024907886982,0.035195446297526,267564.47844744
 90 | 89,-5.6666666666667,420,12,0.31623108011484,0.032271535798907,270516.86856771
 91 | 90,-7.3333333333333,335,9,0.31568478637934,0.03311902026087,273472.08324647
 92 | 91,-6.8,385,10,0.30999838203192,0.036625040695071,276433.87832451
 93 | 92,-11.5,374,10,0.33208245170116,0.032671007275581,279379.32505131
 94 | 93,-4.15,359,10,0.31672998613119,0.033254537403584,282328.62156129
 95 | 94,-8.5,421,11,0.34280806851387,0.033647121354938,285282.38619661
 96 | 95,-4.2777777777778,338,9,0.35397489124537,0.034459251075983,288230.86623335
 97 | 96,-5.5,401,11,0.36022583991289,0.032835136532784,291176.53364658
 98 | 97,-4.5833333333333,433,12,0.34793751347065,0.033339282497764,294131.79124165
 99 | 98,-6.6666666666667,365,9,0.34626139050722,0.03423603951931,297084.02317357
100 | 99,-6.35,356,10,0.33870465636253,0.034306661583483,300040.07620072
101 | 


--------------------------------------------------------------------------------
/plots/Pong2Player05p_meanq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_meanq.png


--------------------------------------------------------------------------------
/plots/Pong2Player05p_reward_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_reward_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player05p_tderror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_tderror.png


--------------------------------------------------------------------------------
/plots/Pong2Player05p_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_time.png


--------------------------------------------------------------------------------
/plots/Pong2Player075.csv:
--------------------------------------------------------------------------------
 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB
 2 | 49;1;20;1750;6;10620;-5,5;-5
 3 | 49;2;2;1986;1;5032;-0,75;-1
 4 | 49;3;28;1693;8;12160;-7;-7
 5 | 49;4;44;1227;11;25080;-9,25;-10
 6 | 49;5;13;1899;6;6340;-5;-5,5
 7 | 49;6;24;1617;10;14288;-9,25;-8,25
 8 | 49;7;9;1745;4;11664;-3,5;-3,5
 9 | 49;8;63;1247;18;22684;-16,25;-15,25
10 | 49;9;11;1485;5;19376;-4,5;-4,25
11 | 49;10;34;1494;7;17680;-6;-6,25
12 | 


--------------------------------------------------------------------------------
/plots/Pong2Player075_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_average_reward.png


--------------------------------------------------------------------------------
/plots/Pong2Player075_episode_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_episode_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player075_history_A.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-28.440476190476,674,21,-0.078805467367172,0.039509000293911,0
  3 | 2,-23.939189189189,991,37,-0.19247976750135,0.049804273158312,2115.9324629307
  4 | 3,-24.796875,891,32,-0.3266643422842,0.045056506931782,4963.2593638897
  5 | 4,-27.617647058824,579,17,-0.37178059887886,0.05166561716795,7856.8729798794
  6 | 5,-17.34375,544,24,-0.42195177257061,0.038914656162262,10780.833153009
  7 | 6,-25.392857142857,984,35,-0.44268959724903,0.037266859948635,13695.362179756
  8 | 7,-27.629032258065,952,31,-0.46903250986338,0.043649382352829,16637.670908928
  9 | 8,-22.737704918033,1659,61,-0.61683112418652,0.041099587380886,19597.94380188
 10 | 9,-22.085820895522,1505,67,-0.65997887200117,0.039472180187702,22610.275089979
 11 | 10,-22.977272727273,1054,44,-0.72120541602373,0.042340319633484,26011.902555943
 12 | 11,-21.684782608696,510,23,-0.76017710638046,0.034048043489456,29419.893827915
 13 | 12,-28.8125,550,16,-0.75746470314264,0.035089691400528,32803.928194046
 14 | 13,-29.826923076923,487,13,-0.80500614327192,0.043355384588242,36173.552186966
 15 | 14,-23.808823529412,515,17,-0.81714948797226,0.035739391088486,39532.436095715
 16 | 15,-30.903846153846,475,13,-0.77060514008999,0.040142685949802,42889.226594687
 17 | 16,-29.979166666667,447,12,-0.7682787065506,0.043726075947285,45901.21411252
 18 | 17,-31.727272727273,421,11,-0.70375599229336,0.044094051361084,48914.93513751
 19 | 18,-31.681818181818,410,11,-0.67170395106077,0.042927358448505,51929.802863598
 20 | 19,-33.727272727273,429,11,-0.67993818211555,0.039811842501163,54943.719038486
 21 | 20,-27.910714285714,440,14,-0.58732054197788,0.03741382163763,57958.526197433
 22 | 21,-33.1,397,10,-0.55022555148602,0.040578206688166,60973.126054525
 23 | 22,-31.65,388,10,-0.57079403340816,0.039794300436974,63989.139203548
 24 | 23,-31.045454545455,425,11,-0.5507935795188,0.040159463077784,67004.544019699
 25 | 24,-30.431818181818,401,11,-0.51569227671623,0.037448912441731,70036.117438555
 26 | 25,-32.25,392,10,-0.52514779758453,0.039347346395254,73039.492605686
 27 | 26,-32.25,110,3,-0.47071734595299,0.036341747313738,76062.590397596
 28 | 27,-32.3,383,10,-0.4714185205698,0.036542771577835,79094.062027454
 29 | 28,-32.111111111111,344,9,-0.46518271881342,0.033119259476662,82460.400069475
 30 | 29,-33.5,393,10,-0.46919112837315,0.034872973203659,85488.991984367
 31 | 30,-32.725,372,10,-0.42546955668926,0.035309938177466,88513.200140238
 32 | 31,-30.35,355,10,-0.39419751614332,0.037728601813316,91537.195670128
 33 | 32,-32.175,367,10,-0.35395925176144,0.034827574513853,94553.798403025
 34 | 33,-31.416666666667,324,9,-0.34443300080299,0.033128284499049,97573.81096077
 35 | 34,-31.1,362,10,-0.33986711114645,0.032994185395539,100589.43251872
 36 | 35,-31.944444444444,355,9,-0.34490878891945,0.033036506228149,103614.39954591
 37 | 36,-30.25,358,10,-0.32507938158512,0.03493745534867,106632.72146487
 38 | 37,-31,299,8,-0.29529939067364,0.03263739605248,109651.67803073
 39 | 38,-31.111111111111,322,9,-0.30571579384804,0.033988059701398,112670.15548062
 40 | 39,-30.666666666667,321,9,-0.31798388397694,0.037830376267433,115692.09249473
 41 | 40,-29.75,64,1,-0.31833603322506,0.036371737275273,118718.38890862
 42 | 41,-26.583333333333,200,6,-0.29567375910282,0.034206227421761,121740.25946045
 43 | 42,-28.725,342,10,-0.28755002248287,0.034084396282211,125090.61474347
 44 | 43,-32.1875,310,8,-0.27520984494686,0.034307959004771,128114.22638273
 45 | 44,-30.75,248,6,-0.28520494115353,0.033417053673416,131138.29339767
 46 | 45,-28.722222222222,304,9,-0.24047256863117,0.032669705805369,134158.79221582
 47 | 46,-32.964285714286,281,7,-0.23651483201981,0.034113161490299,137181.63822651
 48 | 47,-30.75,304,8,-0.22929638022184,0.035676180838374,140209.12533069
 49 | 48,-32.027777777778,330,9,-0.22382859092951,0.033306384172756,143231.82737756
 50 | 49,-31.96875,302,8,-0.22882852852345,0.035149923972203,146256.2114768
 51 | 50,-31.875,311,8,-0.19476912009716,0.033764365680981,149283.37731457
 52 | 51,0,26,0,-0.20900189971924,0.036285256405827,152311.60005665
 53 | 52,-30.964285714286,259,7,-0.21334506094456,0.033617449524812,155335.61908579
 54 | 53,-32.15625,306,8,-0.21985206311941,0.035054566949606,158679.64606357
 55 | 54,0,10,0,-0.21830126535892,0.035162898614537,161709.62453771
 56 | 55,0,1,0,-0.21426600271463,0.034257032785332,164731.5665195
 57 | 56,-31.178571428571,258,7,-0.22991574454308,0.033552558975993,168073.77445865
 58 | 57,0,8,0,-0.22385918140411,0.035624685370596,171421.55948472
 59 | 58,0,32,0,-0.2307907345295,0.033521595358849,174889.82973671
 60 | 59,0,30,0,-0.21952018594742,0.037359246272594,178231.51118279
 61 | 60,0,13,0,-0.21459531658888,0.033596971890889,181576.5636518
 62 | 61,-30.5,117,3,-0.22054652988911,0.034634251405951,184920.482651
 63 | 62,-30.5,153,4,-0.21846684992313,0.035553850755794,188261.08853292
 64 | 63,-26.75,41,1,-0.21431094408035,0.03240073265566,191643.86233997
 65 | 64,0,2,0,-0.21656257855892,0.034314641881967,195033.02419901
 66 | 65,-31.333333333333,220,6,-0.21119965481758,0.03203012568265,198387.73912501
 67 | 66,-27.75,86,2,-0.19464588975906,0.03143425309821,201733.42537999
 68 | 67,-35.25,57,1,-0.18309987294674,0.031151129993377,205141.69607902
 69 | 68,0,20,0,-0.17650469058752,0.030899880289566,208501.50281811
 70 | 69,0,29,0,-0.17109624695778,0.031047332327347,211865.68041515
 71 | 70,-31.15,201,5,-0.168033218503,0.031188633478247,215208.4411881
 72 | 71,-29.916666666667,312,9,-0.16163378447294,0.031242613693699,218551.90005112
 73 | 72,0,21,0,-0.18294815135002,0.030757001365069,221943.50013113
 74 | 73,0,9,0,-0.20314860373735,0.031849301447161,225461.31916332
 75 | 74,-28.75,57,1,-0.21980968916416,0.032152000057278,228792.55967522
 76 | 75,-32.428571428571,273,7,-0.23677251183987,0.033057268942706,232123.80669713
 77 | 76,0,4,0,-0.24064756345749,0.032449412482325,235467.32760644
 78 | 77,0,13,0,-0.24053260362148,0.032613435472827,238480.73442841
 79 | 78,0,5,0,-0.26564919984341,0.034383471850306,241808.53344035
 80 | 79,0,17,0,-0.25765567457676,0.03478965630359,245132.98487949
 81 | 80,0,1,0,-0.23240065824986,0.033272572978283,248461.24440837
 82 | 81,0,28,0,-0.24689063930511,0.036688380377833,251784.86009049
 83 | 82,0,4,0,-0.25523908925056,0.036049961570418,255111.13907838
 84 | 83,-32.25,282,7,-0.24346972894669,0.035403252773918,258439.50497341
 85 | 84,0,13,0,-0.26288804399967,0.033645914101973,261762.70466757
 86 | 85,-33,222,5,-0.27623062241077,0.033039603222162,265189.84182572
 87 | 86,0,7,0,-0.298228297472,0.035824844000861,268527.62760568
 88 | 87,0,26,0,-0.31707869124413,0.036494645655155,271539.21241379
 89 | 88,0,26,0,-0.30207984232903,0.033548617534339,274869.65589881
 90 | 89,0,2,0,-0.29758540093899,0.035752976356074,278201.52565455
 91 | 90,-29.75,36,1,-0.29844088804722,0.040411054283381,281534.15359473
 92 | 91,0,15,0,-0.2869536460638,0.035340740567073,284860.0808897
 93 | 92,0,23,0,-0.29461841249466,0.036445504205301,288192.82416868
 94 | 93,0,20,0,-0.28057670366764,0.036911166271195,291518.35870671
 95 | 94,-32.75,65,1,-0.27155382359028,0.036631896108855,294841.96218395
 96 | 95,0,3,0,-0.26560618460178,0.034903613431379,298169.07247496
 97 | 96,0,22,0,-0.26107378935814,0.034363488769159,301519.17273879
 98 | 97,0,20,0,-0.25742229640484,0.033933212923817,304858.42739272
 99 | 98,0,13,0,-0.25535943281651,0.033768573452719,308351.24397063
100 | 


--------------------------------------------------------------------------------
/plots/Pong2Player075_history_B.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-26.22619047619,674,21,-0.070096668124199,0.023073135763407,0
  3 | 2,-21.986486486486,991,37,-0.16645452481508,0.031430733263493,2115.9324629307
  4 | 3,-23.4921875,891,32,-0.26210048544407,0.033524731636047,4963.2593638897
  5 | 4,-28.176470588235,579,17,-0.3241598880887,0.040739152014256,7856.8729798794
  6 | 5,-21.8125,544,24,-0.38103352844715,0.038006673932075,10780.833153009
  7 | 6,-23.357142857143,984,35,-0.39804885518551,0.030070164382458,13695.362179756
  8 | 7,-25.943548387097,952,31,-0.42522195124626,0.039641461074352,16637.670908928
  9 | 8,-24.827868852459,1659,61,-0.48663937526941,0.033724322736263,19597.94380188
 10 | 9,-17.19776119403,1505,67,-0.58310771363974,0.039613855063915,22610.275089979
 11 | 10,-18.386363636364,1054,44,-0.67385898274183,0.047257747024298,26011.902555943
 12 | 11,-16.663043478261,510,23,-0.74345404291153,0.047774670898914,29419.893827915
 13 | 12,-28.71875,550,16,-0.7510941234827,0.047621515154839,32803.928194046
 14 | 13,-31.557692307692,487,13,-0.77566252315044,0.045721424460411,36173.552186966
 15 | 14,-27.044117647059,515,17,-0.7251959322691,0.046316379964352,39532.436095715
 16 | 15,-31.019230769231,475,13,-0.70267062968016,0.047811714947224,42889.226594687
 17 | 16,-30.395833333333,447,12,-0.72241412043571,0.051598012924194,45901.21411252
 18 | 17,-31.909090909091,421,11,-0.64478324902058,0.042378659039736,48914.93513751
 19 | 18,-30.522727272727,410,11,-0.59752578818798,0.047355640590191,51929.802863598
 20 | 19,-32.931818181818,429,11,-0.59042614203691,0.048118498712778,54943.719038486
 21 | 20,-24.964285714286,440,14,-0.61142364943027,0.04725267457962,57958.526197433
 22 | 21,-32,397,10,-0.5925022212863,0.046299857914448,60973.126054525
 23 | 22,-29.95,388,10,-0.58906208235025,0.047253952890635,63989.139203548
 24 | 23,-32.113636363636,425,11,-0.56366939502954,0.042660036683083,67004.544019699
 25 | 24,-31.613636363636,401,11,-0.59884598815441,0.040162801504135,70036.117438555
 26 | 25,-31.1,392,10,-0.56721539533138,0.041649486452341,73039.492605686
 27 | 26,-30.75,110,3,-0.54824591767788,0.039677893638611,76062.590397596
 28 | 27,-31.4,383,10,-0.53941313302517,0.039290070354939,79094.062027454
 29 | 28,-32.055555555556,344,9,-0.52132974845171,0.041671369194984,82460.400069475
 30 | 29,-32.825,393,10,-0.50579402017593,0.040201214283705,85488.991984367
 31 | 30,-32.2,372,10,-0.48678982871771,0.041224533766508,88513.200140238
 32 | 31,-28.275,355,10,-0.39500003600121,0.036619587063789,91537.195670128
 33 | 32,-31.875,367,10,-0.34533633959293,0.040921632256359,94553.798403025
 34 | 33,-29.833333333333,324,9,-0.36864508163929,0.037416964419186,97573.81096077
 35 | 34,-29.45,362,10,-0.36766059172153,0.038218563757837,100589.43251872
 36 | 35,-32.027777777778,355,9,-0.36577108734846,0.038333136588335,103614.39954591
 37 | 36,-28.375,358,10,-0.35177277600765,0.040965191282332,106632.72146487
 38 | 37,-30.03125,299,8,-0.34595155924559,0.036479432418942,109651.67803073
 39 | 38,-29.555555555556,322,9,-0.30786217570305,0.039175052693114,112670.15548062
 40 | 39,-28.638888888889,321,9,-0.3109705504775,0.039023913567886,115692.09249473
 41 | 40,-31.5,64,1,-0.32269009959698,0.037012370828539,118718.38890862
 42 | 41,-28.25,200,6,-0.29259986150265,0.039351981215179,121740.25946045
 43 | 42,-26.05,342,10,-0.31786714351177,0.03637124568969,125090.61474347
 44 | 43,-32.125,310,8,-0.31034964895248,0.037696102849673,128114.22638273
 45 | 44,-32.25,248,6,-0.33015128362179,0.036158583816141,131138.29339767
 46 | 45,-30,304,9,-0.35357726168633,0.03773136805743,134158.79221582
 47 | 46,-32.785714285714,281,7,-0.36206952941418,0.036562707044184,137181.63822651
 48 | 47,-32.03125,304,8,-0.3573007928133,0.036651544630527,140209.12533069
 49 | 48,-30.777777777778,330,9,-0.36903064417839,0.039021208509803,143231.82737756
 50 | 49,-30.59375,302,8,-0.36960311710835,0.034745419502258,146256.2114768
 51 | 50,-31.78125,311,8,-0.35660752868652,0.036782532520592,149283.37731457
 52 | 51,0,26,0,-0.35973270308971,0.039194688305259,152311.60005665
 53 | 52,-31.535714285714,259,7,-0.32979233515263,0.039308880701661,155335.61908579
 54 | 53,-33.03125,306,8,-0.30454251247644,0.038068914060481,158679.64606357
 55 | 54,0,10,0,-0.3077388882637,0.03486764463596,161709.62453771
 56 | 55,0,1,0,-0.287833101511,0.036615976681584,164731.5665195
 57 | 56,-30.071428571429,258,7,-0.28559452927113,0.038736656761728,168073.77445865
 58 | 57,0,8,0,-0.28965576517582,0.03533292925451,171421.55948472
 59 | 58,0,32,0,-0.29404142677784,0.03537861924246,174889.82973671
 60 | 59,0,30,0,-0.27960905748606,0.035781114966609,178231.51118279
 61 | 60,0,13,0,-0.26389364635944,0.035452972461237,181576.5636518
 62 | 61,-30.75,117,3,-0.25413168799877,0.036756916988408,184920.482651
 63 | 62,-32.0625,153,4,-0.24930246031284,0.03570568712987,188261.08853292
 64 | 63,-29.25,41,1,-0.2609234815836,0.034284014912322,191643.86233997
 65 | 64,0,2,0,-0.26833349788189,0.036336692431942,195033.02419901
 66 | 65,-31.083333333333,220,6,-0.2487211471796,0.033628752228804,198387.73912501
 67 | 66,-24.75,86,2,-0.2333458237648,0.032874883798882,201733.42537999
 68 | 67,-34.75,57,1,-0.22106080913544,0.032228405235335,205141.69607902
 69 | 68,0,20,0,-0.2113011238575,0.032315241830423,208501.50281811
 70 | 69,0,29,0,-0.2016722881794,0.032087664677761,211865.68041515
 71 | 70,-30.1,201,5,-0.19333104896545,0.031947336994577,215208.4411881
 72 | 71,-27.638888888889,312,9,-0.18695436799526,0.031283404531889,218551.90005112
 73 | 72,0,21,0,-0.22919175350666,0.031978903968818,221943.50013113
 74 | 73,0,9,0,-0.22746892988682,0.032814852227923,225461.31916332
 75 | 74,-30.75,57,1,-0.24791103869677,0.035097305248724,228792.55967522
 76 | 75,-32.071428571429,273,7,-0.24014590930939,0.035565375929698,232123.80669713
 77 | 76,0,4,0,-0.24898050534725,0.034651560001832,235467.32760644
 78 | 77,0,13,0,-0.27807631742954,0.036324908265844,238480.73442841
 79 | 78,0,5,0,-0.28194893968105,0.039241063005291,241808.53344035
 80 | 79,0,17,0,-0.29142114639282,0.038866422567284,245132.98487949
 81 | 80,0,1,0,-0.30010227823257,0.038893028046004,248461.24440837
 82 | 81,0,28,0,-0.30566997385025,0.038882899645716,251784.86009049
 83 | 82,0,4,0,-0.2768429749012,0.039276440129615,255111.13907838
 84 | 83,-31,282,7,-0.28723658442497,0.037448742699809,258439.50497341
 85 | 84,0,13,0,-0.30693704926968,0.038763760032132,261762.70466757
 86 | 85,-32.1,222,5,-0.28970084834099,0.037739263318479,265189.84182572
 87 | 86,0,7,0,-0.30202293968201,0.037263768468052,268527.62760568
 88 | 87,0,26,0,-0.30429276943207,0.038816785745323,271539.21241379
 89 | 88,0,26,0,-0.29531943643093,0.037614325005561,274869.65589881
 90 | 89,0,2,0,-0.28496080207825,0.039185200965963,278201.52565455
 91 | 90,-31.5,36,1,-0.28107423722744,0.039146922259592,281534.15359473
 92 | 91,0,15,0,-0.26688726651669,0.040104887501802,284860.0808897
 93 | 92,0,23,0,-0.28069443309307,0.037644153595902,288192.82416868
 94 | 93,0,20,0,-0.30239212501049,0.037012866429053,291518.35870671
 95 | 94,-33.75,65,1,-0.29299341022968,0.037025410333648,294841.96218395
 96 | 95,0,3,0,-0.28267125856876,0.035941258675884,298169.07247496
 97 | 96,0,22,0,-0.27609798681736,0.03508983820118,301519.17273879
 98 | 97,0,20,0,-0.27340524804592,0.034850677061826,304858.42739272
 99 | 98,0,13,0,-0.27220576739311,0.034442967168987,308351.24397063
100 | 


--------------------------------------------------------------------------------
/plots/Pong2Player075_meanq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_meanq.png


--------------------------------------------------------------------------------
/plots/Pong2Player075_reward_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_reward_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player075_tderror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_tderror.png


--------------------------------------------------------------------------------
/plots/Pong2Player075_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_time.png


--------------------------------------------------------------------------------
/plots/Pong2Player075p.csv:
--------------------------------------------------------------------------------
 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB
 2 | 49;1;234;314;40;19048;-6,75;-3,25
 3 | 49;2;247;285;41;20716;-4,25;-6
 4 | 49;3;270;322;41;16028;-4,25;-6
 5 | 49;4;276;337;41;20052;-6;-4,25
 6 | 49;5;251;269;40;16840;-6,75;-3,25
 7 | 49;6;250;350;39;19668;-2,25;-7,5
 8 | 49;7;230;286;37;14700;-9;-0,25
 9 | 49;8;232;259;38;22368;-1,25;-8,25
10 | 49;9;207;246;31;29452;5,75;-13,5
11 | 49;10;191;216;32;14024;4,75;-12,75
12 | 


--------------------------------------------------------------------------------
/plots/Pong2Player075p_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_average_reward.png


--------------------------------------------------------------------------------
/plots/Pong2Player075p_episode_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_episode_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player075p_history_A.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,15.75,1807,86,0.024660635497421,0.030749857582152,0
  3 | 2,15.75,1808,86,0.051130523443222,0.030327398702502,3080.0637950897
  4 | 3,10.659090909091,556,22,0.068492970630527,0.02893841022253,6112.7202789783
  5 | 4,8.0729166666667,591,24,0.088593688845634,0.030321074590087,9164.36353302
  6 | 5,10.583333333333,566,21,0.10387785038352,0.029432385802269,12226.163316011
  7 | 6,-5.5,568,18,0.10838790327311,0.031264565363526,15295.448102951
  8 | 7,-7.9375,542,16,0.11674013504386,0.029484617933631,18352.79207778
  9 | 8,12.166666666667,599,24,0.16122512874007,0.052343306601048,21446.514513969
 10 | 9,10.806603773585,1267,53,0.2247808945179,0.037818958848715,24605.232800007
 11 | 10,15.053797468354,1720,79,0.24170792597532,0.044391799166799,27684.980213881
 12 | 11,-1.5326086956522,1590,46,0.24667657256126,0.050101535048336,30776.000858068
 13 | 12,-9.7083333333333,1478,48,0.22868193393946,0.053942181050777,33859.155449152
 14 | 13,-4.7727272727273,1436,44,0.39942449390888,0.063749760290608,36948.059296131
 15 | 14,-11.257575757576,1106,33,0.34375342148542,0.059864736787975,40032.171612024
 16 | 15,7.202380952381,1264,42,0.32036609226465,0.069277341924608,43118.681558847
 17 | 16,-13.28125,775,24,0.29566724926233,0.063349225189537,46197.414586782
 18 | 17,0.94166666666667,1037,30,0.41377734726667,0.071681627377868,49284.942941904
 19 | 18,5.75,748,24,0.33886451756954,0.068497176675126,52369.253451109
 20 | 19,-8.71875,842,24,0.45781510215998,0.058275318542495,55457.752614021
 21 | 20,-5.8676470588235,672,17,0.53462120777369,0.057810675021261,58542.084111214
 22 | 21,-6.8421052631579,716,19,0.60764492100477,0.064270095337182,61634.21347928
 23 | 22,-5.8571428571429,732,21,0.64261210149527,0.068129319958389,64716.454583406
 24 | 23,-1.7738095238095,787,21,0.61844365131855,0.057609518219717,67804.824807405
 25 | 24,-3.1071428571429,556,14,0.63900305151939,0.056879258744419,70885.788652182
 26 | 25,1.1166666666667,534,15,0.6976574755311,0.049259904682636,73973.405339479
 27 | 26,-1.5192307692308,468,13,0.64544979292154,0.04733891300112,77058.188096285
 28 | 27,-3.75,554,14,0.6684123942256,0.04495135641098,80147.682602406
 29 | 28,-2.0769230769231,507,13,0.66438281059265,0.047691881015897,83233.454415321
 30 | 29,-5.9464285714286,519,14,0.6938461330533,0.047677454277873,86321.545918226
 31 | 30,-5.625,540,14,0.62893603372574,0.048527343899012,89405.741168261
 32 | 31,-5.8269230769231,468,13,0.6258493257165,0.042918682560325,92495.569252253
 33 | 32,-5.96875,603,16,0.60775027537346,0.041709499500692,95580.412363291
 34 | 33,-0.8125,437,12,0.56024558949471,0.043020392000675,98671.039297342
 35 | 34,-6.6666666666667,581,15,0.57606478148699,0.037179504457861,101756.21317363
 36 | 35,-6.2291666666667,432,12,0.54789750576019,0.039359708514065,104845.51876974
 37 | 36,-8.75,453,12,0.4921031267643,0.042268126763403,107931.81754065
 38 | 37,-4.0166666666667,590,15,0.48646099513769,0.042580844625831,111023.61872482
 39 | 38,-8,516,15,0.49522505527735,0.042653404973447,114106.57322884
 40 | 39,-1.3846153846154,507,13,0.48793794256449,0.04224406632781,117195.03930712
 41 | 40,-5.4318181818182,423,11,0.48889998710155,0.040071641802788,120281.06809187
 42 | 41,-4.9230769230769,505,13,0.48055751681328,0.039680305801332,123372.07204795
 43 | 42,-6.0208333333333,482,12,0.45747923773527,0.042458710625768,126458.34766293
 44 | 43,-1.9,555,15,0.45183955562115,0.042399740766734,129550.40173292
 45 | 44,-7.5,479,13,0.47835363066196,0.042559848606586,132637.18359709
 46 | 45,-6.6875,455,12,0.44431625276804,0.040284845102578,135729.21975207
 47 | 46,-8.9807692307692,486,13,0.45677413457632,0.038720687437803,138815.07875204
 48 | 47,-4.825,396,10,0.45799841880798,0.036913332745433,141908.51582503
 49 | 48,-7.5681818181818,393,11,0.43723845565319,0.042279472753406,144993.19221425
 50 | 49,-1.5,559,15,0.44824945348501,0.039370908103883,148082.60764003
 51 | 50,-6.2083333333333,456,12,0.45087397807837,0.03716516715847,151168.20122814
 52 | 51,-9.8,534,15,0.43702257186174,0.035734470188618,154257.148736
 53 | 52,-8,492,13,0.44362791693211,0.03762533941865,157344.15754008
 54 | 53,-5.25,459,12,0.45500042200089,0.036687616363168,160434.63341284
 55 | 54,-5.3409090909091,413,11,0.4488413001895,0.034906475421041,163523.18911505
 56 | 55,-6.4318181818182,436,11,0.45025093793869,0.036342834193259,166613.51492286
 57 | 56,-8.0384615384615,461,13,0.46570703411102,0.03995348854363,169697.67845893
 58 | 57,-7.6346153846154,501,13,0.43338271135092,0.039611381992698,172786.25870085
 59 | 58,-7.7916666666667,437,12,0.44928603261709,0.040222072809935,175872.73005581
 60 | 59,-5.9583333333333,463,12,0.46624615210295,0.040560769751668,178964.54942298
 61 | 60,-9.9090909090909,413,11,0.4442690333128,0.038295251406729,182062.75369263
 62 | 61,-3.8125,428,12,0.43601207023859,0.035158951945603,185158.80821776
 63 | 62,-7.6923076923077,475,13,0.40569348537922,0.035799814514816,188250.66081071
 64 | 63,-6.6363636363636,431,11,0.42681465303898,0.042572220876813,191351.09989786
 65 | 64,-5,400,11,0.41745218658447,0.0398869420439,194434.95675683
 66 | 65,-6,402,10,0.42086963629723,0.0396958931759,197527.92675591
 67 | 66,-8.425,387,10,0.42330500152707,0.038195345409214,200615.96345854
 68 | 67,-4.7727272727273,421,11,0.40715691006184,0.036385320782661,203720.71026158
 69 | 68,-8.5833333333333,443,12,0.40344660624862,0.039129270635545,206804.60639071
 70 | 69,-8.7,373,10,0.40128754279017,0.036490554556251,209908.93703961
 71 | 70,-8.6666666666667,443,12,0.40851074105501,0.038445966929197,213017.6966126
 72 | 71,-2.7272727272727,413,11,0.41067783224583,0.0347543014884,216075.20573568
 73 | 72,-4.7708333333333,441,12,0.40127306774259,0.036854844558984,219183.13803959
 74 | 73,-5.8958333333333,453,12,0.40975332427025,0.036320907175541,222279.96047664
 75 | 74,-9.4090909090909,400,11,0.39718827790022,0.035727003157139,225375.15562463
 76 | 75,-12.6875,408,12,0.39383558106422,0.036454273343086,228467.32659149
 77 | 76,-3.9772727272727,409,11,0.40533371210098,0.037318908035755,231551.33051062
 78 | 77,-4.6136363636364,429,11,0.39889466708899,0.036922183424234,234666.38622546
 79 | 78,-8.8461538461538,475,13,0.39915732344985,0.036689936708659,237766.57640219
 80 | 79,-7,445,12,0.38725353875756,0.032528960898519,240870.14482617
 81 | 80,-8.75,426,11,0.39519296753407,0.034220179125667,243970.74544001
 82 | 81,-5.2045454545455,388,11,0.39592769497633,0.03491831561923,247072.61222601
 83 | 82,-7.1818181818182,376,11,0.40324878278375,0.035286599181592,250179.29944086
 84 | 83,-9.875,379,10,0.38703223651648,0.032568644538522,253264.30240369
 85 | 84,-5.9545454545455,416,11,0.40559637379646,0.037186695754528,256350.25635958
 86 | 85,-6,449,11,0.40885295468569,0.035535577245057,259451.20921779
 87 | 86,-9.2083333333333,428,12,0.40436582121253,0.033965634226799,262556.47954965
 88 | 87,-5.9772727272727,383,11,0.41843406271935,0.033925799757242,265645.21741557
 89 | 88,-5.6363636363636,432,11,0.40849586594105,0.038514320969582,268736.37077069
 90 | 89,-6.9545454545455,416,11,0.40571232023835,0.036798851117492,271826.71463871
 91 | 90,-9.15,361,10,0.39028529846668,0.037728272676468,274916.96188283
 92 | 91,-6.125,430,12,0.40744237133861,0.037438799366355,278015.84529686
 93 | 92,-10.159090909091,400,11,0.39765123099089,0.037313489928842,281105.74615073
 94 | 93,-4.1,385,10,0.41264896920323,0.037302253067493,284197.1706388
 95 | 94,-8.6111111111111,362,9,0.41678735074401,0.034228690594435,287305.07751155
 96 | 95,-10.361111111111,322,9,0.42678021454811,0.033199661089806,290404.15810561
 97 | 96,-5.6666666666667,355,9,0.40362212339044,0.038675586894155,293492.53668165
 98 | 97,-8.1818181818182,404,11,0.40906615021825,0.033120844051242,296592.94970655
 99 | 98,-5.225,380,10,0.41440004438162,0.036935546435416,299686.30462861
100 | 99,-5.875,374,10,0.41312079289556,0.036710994064808,302787.99873877
101 | 


--------------------------------------------------------------------------------
/plots/Pong2Player075p_meanq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_meanq.png


--------------------------------------------------------------------------------
/plots/Pong2Player075p_reward_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_reward_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player075p_tderror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_tderror.png


--------------------------------------------------------------------------------
/plots/Pong2Player075p_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_time.png


--------------------------------------------------------------------------------
/plots/Pong2Player0_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_average_reward.png


--------------------------------------------------------------------------------
/plots/Pong2Player0_episode_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_episode_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player0_history_A.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-11.227272727273,251,22,-0.025585235670209,0.025499564410187,0
  3 | 2,-3.08,77,25,-0.10267928412557,0.026237351149321,2138.1417090893
  4 | 3,-21,378,18,-0.1350394269228,0.018906522810459,5032.016119957
  5 | 4,-16.333333333333,588,36,-0.17244348245859,0.027499322533607,7944.8819160461
  6 | 5,-17.4,609,35,-0.17134431993961,0.03071308735013,10893.190360785
  7 | 6,-18,630,35,-0.18266862034798,0.030370710909367,13839.044056892
  8 | 7,-10.333333333333,217,21,-0.17723900139332,0.018381405025721,16811.021166086
  9 | 8,-11.6,232,20,-0.17556799054146,0.022607351928949,19795.227915049
 10 | 9,-12.03125,386,32,-0.21307243537903,0.027679172545671,22806.841446161
 11 | 10,-13.51724137931,404,29,-0.23035820114613,0.02044185616076,25822.181778193
 12 | 11,-6.875,165,24,-0.26865710783005,0.028466097682714,28847.795817852
 13 | 12,-18.125,290,16,-0.26660308158398,0.023737326353788,31865.068264961
 14 | 13,-11.966666666667,362,30,-0.32671620607376,0.025554981783032,34888.49136591
 15 | 14,-8.45,171,20,-0.35948745524883,0.027599836587906,37907.987119913
 16 | 15,-19,417,21,-0.39233250629902,0.02771189391613,40934.394598007
 17 | 16,-16.789473684211,332,19,-0.35827468526363,0.024274397015572,43948.930922031
 18 | 17,-14.055555555556,269,18,-0.40537124884129,0.02935955581069,46971.943500996
 19 | 18,-18.157894736842,346,19,-0.38306200611591,0.028565983921289,49988.333903074
 20 | 19,-21,527,25,-0.37758827137947,0.02897119282186,53014.636921883
 21 | 20,-21,510,24,-0.40028989064693,0.031559363260865,56030.686516047
 22 | 21,-21,382,18,-0.42341521680355,0.030187362790108,59056.352437973
 23 | 22,-21,688,32,-0.48741152131557,0.032671946972609,62074.302101851
 24 | 23,-14.928571428571,420,28,-0.44714593672752,0.034300659775734,65102.738009691
 25 | 24,-21,420,20,-0.41901146733761,0.032433551505208,68122.853546858
 26 | 25,-16.192307692308,421,26,-0.39868759739399,0.030124093748629,71147.998114824
 27 | 26,-18.764705882353,324,17,-0.37142059743404,0.030927883055061,74168.781708956
 28 | 27,-20.809523809524,450,21,-0.36811852920055,0.032222523305565,77202.353144884
 29 | 28,-19.642857142857,284,14,-0.3046877477169,0.030882145626005,80223.19860363
 30 | 29,-16.3125,262,16,-0.23653232705593,0.0310577664515,83246.392048597
 31 | 30,-14.153846153846,185,13,-0.22916741073132,0.028503048017621,86263.41059041
 32 | 31,-20,263,13,-0.1838299318552,0.028571169128641,89288.379914522
 33 | 32,-15.083333333333,194,12,-0.18447684276104,0.028669490520842,92305.957981825
 34 | 33,-20.9,221,10,-0.12901995027065,0.028296903408365,95331.062992573
 35 | 34,-19.090909090909,217,11,-0.12079485702515,0.029319755812641,98348.517512321
 36 | 35,-19.071428571429,272,14,-0.10410001826286,0.035293330463581,101371.28614831
 37 | 36,-19.230769230769,269,13,-0.066544069170952,0.026355039540213,104388.2614243
 38 | 37,-20.363636363636,226,11,-0.041784022212029,0.027937983275391,107410.07730746
 39 | 38,-19,233,12,-0.046414270281792,0.024983407229651,110425.35021234
 40 | 39,-20.1,214,10,-0.060125853300095,0.02581845295243,113446.5987165
 41 | 40,-20.7,219,10,-0.012591427206993,0.028257514157332,116464.33995342
 42 | 41,-20.2,212,10,-0.0086673645973206,0.026001896804199,119487.04254436
 43 | 42,-20.9,218,10,-0.022646985054016,0.030275962976739,122504.26678848
 44 | 43,-20.1,204,10,-0.027892006874084,0.028721425947733,125523.69126534
 45 | 44,-18.5,190,10,-0.019060639977455,0.028338955501793,128542.41256523
 46 | 45,-20.222222222222,197,9,-0.062995014309883,0.028499234545976,131564.51712227
 47 | 46,-20.9,229,10,-0.04321956217289,0.027253655160312,134585.30974007
 48 | 47,-19.222222222222,181,9,-0.04104344856739,0.029550002928823,137609.52091312
 49 | 48,-17.777777777778,176,9,-0.042253398656845,0.029484741275664,140630.72361016
 50 | 49,-18.333333333333,181,9,-0.044190877795219,0.030928210292011,143658.25809813
 51 | 50,-20.5,219,10,-0.032392913341522,0.027665570050478,146677.15844607
 52 | 51,-21,215,10,-0.025153607726097,0.030666341849719,149703.02046299
 53 | 52,-21,198,9,-0.018558032989502,0.029278066476807,152725.13730097
 54 | 53,-20.888888888889,196,9,-0.0093344089984894,0.029708028633497,155749.31631184
 55 | 54,-20.2,211,10,0.0019440084695816,0.031671682446729,158769.69563389
 56 | 55,-19.333333333333,179,9,0.018299667239189,0.026600243350491,161795.82533765
 57 | 56,-21,232,11,0.0043197833299637,0.028242128532613,164816.60238647
 58 | 57,-21,206,9,0.042310146689415,0.026893466249108,167839.94631767
 59 | 58,-17.777777777778,169,9,0.04972175359726,0.027099281826988,170858.79717588
 60 | 59,-17.444444444444,172,9,0.0208746291399,0.030874344742391,173883.27386665
 61 | 60,-18.666666666667,173,9,0.029407291412354,0.029012460903265,176902.7581079
 62 | 61,-20.666666666667,201,9,0.0078649291992188,0.028558519284707,179925.50136185
 63 | 62,-20.222222222222,187,9,-0.0084951033592224,0.027179981692228,182941.50200248
 64 | 63,-14,163,11,-0.031634729266167,0.026187739172485,185963.90782166
 65 | 64,-19.111111111111,178,9,-0.037065606832504,0.03089484960027,188982.32703543
 66 | 65,-20.222222222222,185,9,-0.045297539710999,0.031736915443093,192004.94367456
 67 | 66,-20.5,182,8,-0.021077255725861,0.027121751084924,195026.34629083
 68 | 67,-20,196,9,-0.01648589348793,0.026432656347286,198049.36023474
 69 | 68,-19.555555555556,176,9,-0.0049410942792892,0.026054322204611,201068.14448476
 70 | 69,-21,179,8,0.020968446135521,0.026585901013575,204092.56690407
 71 | 70,-19.777777777778,179,9,0.040612026810646,0.028025028575212,207112.07806778
 72 | 71,-17.875,154,8,0.040276304125786,0.02809495675005,210137.16612267
 73 | 72,-20.888888888889,194,9,0.016871741056442,0.028380218307022,213155.54822254
 74 | 73,-19.5,160,8,0.036802646875381,0.025027464595158,216179.3938725
 75 | 74,-16.75,150,8,0.045555381655693,0.02841033472959,219198.43669176
 76 | 75,-16.875,150,8,0.054811473846436,0.029059150262736,222221.3499825
 77 | 76,-18.75,164,8,0.053368012666702,0.026548398239538,225238.91632152
 78 | 77,-21,234,11,0.045312218904495,0.027458579693921,228261.53490162
 79 | 78,-21,214,10,0.049028945684433,0.024622122777626,231281.81836367
 80 | 79,-17.625,159,8,0.054034627914429,0.025933243244886,234308.13716269
 81 | 80,-20.875,187,8,0.066353542208672,0.02484296876844,237327.41376281
 82 | 81,-21,194,9,0.069789034128189,0.029652824990451,240351.45815897
 83 | 82,-18.375,159,8,0.07883975148201,0.026677033578046,243371.4479301
 84 | 83,-21,181,8,0.081944783449173,0.024853519943543,246393.29405212
 85 | 84,-19.625,169,8,0.087315746188164,0.029320955432951,249412.52044725
 86 | 85,-21,173,8,0.077867820382118,0.026834661031142,252438.86284018
 87 | 86,-17.75,159,8,0.080595542669296,0.025142893266864,255460.55970812
 88 | 87,-19.375,163,8,0.070458922982216,0.026407181705348,258488.0415895
 89 | 88,-19.25,163,8,0.0803895226717,0.023286892633187,261507.89537716
 90 | 89,-19.25,159,8,0.078255670189857,0.023623998247087,264530.37380409
 91 | 90,-20.375,174,8,0.076555946469307,0.025433748307638,267552.72768211
 92 | 91,-19.875,161,8,0.076039079427719,0.024356592353433,270579.07843208
 93 | 92,-21,190,9,0.073541507840157,0.021778888463974,273597.35354686
 94 | 93,-19.75,166,8,0.060794572353363,0.025047093832865,276620.24881077
 95 | 94,-21,180,8,0.051815708041191,0.023602273864672,279639.6330328
 96 | 95,-21,192,9,0.050514903306961,0.025672010847833,282665.09332156
 97 | 96,-21,182,8,0.046867358803749,0.023934892632999,285687.98782468
 98 | 97,-17.444444444444,159,9,0.058180457472801,0.022700749365613,288715.86644173
 99 | 98,-21,205,9,0.056869177818298,0.023170761575224,291737.61343169
100 | 99,-16.125,135,8,0.053170693874359,0.020797000256367,294764.53351068
101 | 


--------------------------------------------------------------------------------
/plots/Pong2Player0_history_B.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-15.636363636364,347,22,-0.035547246217728,0.016063386268914,0
  3 | 2,-20.2,508,25,-0.093115000188351,0.020183824717999,2138.1417090893
  4 | 3,-10.111111111111,185,18,-0.13151980644464,0.016994844824076,5032.016119957
  5 | 4,-10.527777777778,388,36,-0.15854629296064,0.019270747810602,7944.8819160461
  6 | 5,-9.5428571428571,353,35,-0.17677662348747,0.024828659415245,10893.190360785
  7 | 6,-9.8857142857143,355,35,-0.19931477379799,0.01351283454895,13839.044056892
  8 | 7,-16.095238095238,353,21,-0.18255959832668,0.014386477082968,16811.021166086
  9 | 8,-16.3,346,20,-0.19562866699696,0.022980797350407,19795.227915049
 10 | 9,-12.9375,415,32,-0.35770421242714,0.027820185661316,22806.841446161
 11 | 10,-15.344827586207,448,29,-0.42874370098114,0.043996010184288,25822.181778193
 12 | 11,-20.75,506,24,-0.50003332948685,0.040714545398951,28847.795817852
 13 | 12,-15.125,243,16,-0.46861186861992,0.036812288284302,31865.068264961
 14 | 13,-13.366666666667,403,30,-0.46336123096943,0.035849552631378,34888.49136591
 15 | 14,-20.5,423,20,-0.46318495810032,0.038222200334072,37907.987119913
 16 | 15,-6.6190476190476,140,21,-0.45802145218849,0.035477287948132,40934.394598007
 17 | 16,-16.105263157895,319,19,-0.45680632591248,0.053284872546792,43948.930922031
 18 | 17,-15.5,284,18,-0.48365955197811,0.042265393778682,46971.943500996
 19 | 18,-8.9473684210526,170,19,-0.53705098724365,0.044493979424238,49988.333903074
 20 | 19,-1.8,46,25,-0.56766551327705,0.040447373002768,53014.636921883
 21 | 20,-4.4583333333333,113,24,-0.49396479249001,0.034765257060528,56030.686516047
 22 | 21,-6.1111111111111,113,18,-0.50199485063553,0.039501217216253,59056.352437973
 23 | 22,-6.21875,202,32,-0.487187063694,0.039658673390746,62074.302101851
 24 | 23,-18.035714285714,512,28,-0.47721617519855,0.038565734222531,65102.738009691
 25 | 24,-4.6,92,20,-0.52219408416748,0.038196368992329,68122.853546858
 26 | 25,-19.961538461538,520,26,-0.44699176561832,0.038303328856826,71147.998114824
 27 | 26,-15.941176470588,272,17,-0.41673709726334,0.037333195909858,74168.781708956
 28 | 27,-5.1428571428571,111,21,-0.3699722969532,0.038875097535551,77202.353144884
 29 | 28,-13.571428571429,193,14,-0.34000007510185,0.040263681471348,80223.19860363
 30 | 29,-20.1875,324,16,-0.31610184490681,0.039434242356569,83246.392048597
 31 | 30,-20.076923076923,262,13,-0.30181977343559,0.029510153640062,86263.41059041
 32 | 31,-16.769230769231,221,13,-0.26731790697575,0.032566703515127,89288.379914522
 33 | 32,-18.833333333333,234,12,-0.22635623526573,0.033302143184468,92305.957981825
 34 | 33,-15.5,172,10,-0.21410574710369,0.032989202816039,95331.062992573
 35 | 34,-14.636363636364,176,11,-0.23098263418674,0.029800812546164,98348.517512321
 36 | 35,-19,271,14,-0.20933587479591,0.045391301142052,101371.28614831
 37 | 36,-15.769230769231,216,13,-0.1732489168644,0.040104366856627,104388.2614243
 38 | 37,-15.727272727273,176,11,-0.14663448023796,0.03570713039767,107410.07730746
 39 | 38,-17.916666666667,218,12,-0.15233700692654,0.036363918918738,110425.35021234
 40 | 39,-15,158,10,-0.10284967255592,0.039888166427612,113446.5987165
 41 | 40,-15,156,10,-0.110485252738,0.036288297230378,116464.33995342
 42 | 41,-14.6,149,10,-0.08808405315876,0.033201564714778,119487.04254436
 43 | 42,-13.9,152,10,-0.07288631439209,0.038115674535511,122504.26678848
 44 | 43,-13.8,143,10,-0.055069948673248,0.032398316109553,125523.69126534
 45 | 44,-17.4,181,10,-0.033978573799133,0.033925841143355,128542.41256523
 46 | 45,-17.888888888889,175,9,-0.011699901461601,0.033647665683646,131564.51712227
 47 | 46,-10.9,120,10,-0.039500725746155,0.033995908844285,134585.30974007
 48 | 47,-17.222222222222,174,9,-0.012171776294708,0.036561916653533,137609.52091312
 49 | 48,-19.555555555556,186,9,-0.014062858104706,0.036956321249716,140630.72361016
 50 | 49,-17.333333333333,168,9,-0.011009687542915,0.030932576787192,143658.25809813
 51 | 50,-13.5,139,10,-0.029655468344688,0.03278627650626,146677.15844607
 52 | 51,-12.4,126,10,-0.021844506025314,0.034187985648401,149703.02046299
 53 | 52,-14.222222222222,141,9,-0.021329047083855,0.032817083856557,152725.13730097
 54 | 53,-15.444444444444,146,9,-0.010863201618195,0.031814879472833,155749.31631184
 55 | 54,-13.7,143,10,-0.0052515711784363,0.032435634775247,158769.69563389
 56 | 55,-16.888888888889,154,9,-0.023274070858955,0.030390426066704,161795.82533765
 57 | 56,-12.363636363636,140,11,-0.013775194406509,0.029583558951505,164816.60238647
 58 | 57,-14.111111111111,140,9,-0.010566335558891,0.03017777786497,167839.94631767
 59 | 58,-19,182,9,-0.040504007816315,0.03008227618644,170858.79717588
 60 | 59,-18.333333333333,178,9,-0.038168950676918,0.029901974986307,173883.27386665
 61 | 60,-18.111111111111,166,9,-0.035144717693329,0.030881551608909,176902.7581079
 62 | 61,-13.222222222222,134,9,-0.044505524277687,0.030492878863239,179925.50136185
 63 | 62,-15.666666666667,144,9,-0.031238757967949,0.031001482838998,182941.50200248
 64 | 63,-20.363636363636,234,11,-0.0081189591884613,0.031862079079729,185963.90782166
 65 | 64,-17.555555555556,162,9,0.010118610024452,0.03147687318176,188982.32703543
 66 | 65,-16.777777777778,156,9,0.0095146139860153,0.03700613715034,192004.94367456
 67 | 66,-15.25,139,8,0.034772622585297,0.027561287589371,195026.34629083
 68 | 67,-14.555555555556,144,9,0.047611666798592,0.032678590173833,198049.36023474
 69 | 68,-15.777777777778,143,9,0.069406152129173,0.032473844949622,201068.14448476
 70 | 69,-16.125,140,8,0.06597646343708,0.031676553032361,204092.56690407
 71 | 70,-15.888888888889,146,9,0.076632283806801,0.029510184790008,207112.07806778
 72 | 71,-18.75,163,8,0.076743006110191,0.02990186560154,210137.16612267
 73 | 72,-13.444444444444,124,9,0.066774213075638,0.028356832669582,213155.54822254
 74 | 73,-19,154,8,0.078227077245712,0.027551507115364,216179.3938725
 75 | 74,-20,170,8,0.08356684923172,0.03249790078029,219198.43669176
 76 | 75,-19.5,172,8,0.071725313782692,0.02946904342249,222221.3499825
 77 | 76,-18.875,165,8,0.069635803937912,0.02919847426936,225238.91632152
 78 | 77,-10.545454545455,116,11,0.058670830130577,0.025365659317002,228261.53490162
 79 | 78,-12.1,121,10,0.060952044487,0.029573503917083,231281.81836367
 80 | 79,-17.5,149,8,0.039388704061508,0.030934552330524,234308.13716269
 81 | 80,-15.75,139,8,0.032999743103981,0.027728069182485,237327.41376281
 82 | 81,-12.111111111111,117,9,0.02932597899437,0.02745916046761,240351.45815897
 83 | 82,-18.5,154,8,0.043820874333382,0.028538968775421,243371.4479301
 84 | 83,-15.5,136,8,0.026993710756302,0.027210989664309,246393.29405212
 85 | 84,-17.625,148,8,0.035026452422142,0.029123003091663,249412.52044725
 86 | 85,-16.75,138,8,0.058367486596107,0.029693385642022,252438.86284018
 87 | 86,-18.875,168,8,0.073346694946289,0.028949410717934,255460.55970812
 88 | 87,-17.625,144,8,0.070859031200409,0.028716616470367,258488.0415895
 89 | 88,-18.125,157,8,0.064134326934814,0.030109628035687,261507.89537716
 90 | 89,-18.75,151,8,0.076011339068413,0.031651783834212,264530.37380409
 91 | 90,-15.25,134,8,0.085877468347549,0.030369257452898,267552.72768211
 92 | 91,-19,157,8,0.091362419605255,0.026273697428405,270579.07843208
 93 | 92,-13.555555555556,122,9,0.10069872260094,0.026822981573641,273597.35354686
 94 | 93,-16.75,139,8,0.094791305541992,0.027533179562539,276620.24881077
 95 | 94,-14.875,124,8,0.11578515303135,0.026371674910188,279639.6330328
 96 | 95,-13.555555555556,122,9,0.10465205264091,0.025525284297764,282665.09332156
 97 | 96,-13.5,114,8,0.11002721095085,0.023612505104393,285687.98782468
 98 | 97,-16.444444444444,151,9,0.11250710332394,0.028119032643735,288715.86644173
 99 | 98,-10.333333333333,100,9,0.12482603573799,0.028869435520843,291737.61343169
100 | 99,-20.625,178,8,0.12047631049156,0.030346720989794,294764.53351068
101 | 


--------------------------------------------------------------------------------
/plots/Pong2Player0_meanq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_meanq.png


--------------------------------------------------------------------------------
/plots/Pong2Player0_reward_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_reward_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player0_tderror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_tderror.png


--------------------------------------------------------------------------------
/plots/Pong2Player0_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_time.png


--------------------------------------------------------------------------------
/plots/Pong2PlayerVS_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_average_reward.png


--------------------------------------------------------------------------------
/plots/Pong2PlayerVS_episode_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_episode_counts.png


--------------------------------------------------------------------------------
/plots/Pong2PlayerVS_history_A.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,19.894736842105,1652,76,0.036629230499268,0.034258731238544,0
  3 | 2,-10.864864864865,1032,37,0.088388940036297,0.036268266946077,2140.1843442917
  4 | 3,6.75,566,20,0.10980970552564,0.035165142327547,5024.0690410137
  5 | 4,17.782608695652,544,23,0.14155595132709,0.035153854876757,7954.2796509266
  6 | 5,-8.2571428571429,982,35,0.18404190149903,0.034203920930624,10895.232643127
  7 | 6,1.3636363636364,735,22,0.23041268283129,0.036833466291428,13843.955490112
  8 | 7,-4.0555555555556,607,18,0.24128054684401,0.040439512044191,16800.791962147
  9 | 8,-17.246376811594,1712,69,0.27102411770821,0.040627625931054,19781.40379405
 10 | 9,-8.2608695652174,1735,69,0.37685767302662,0.047047990996391,22781.630583048
 11 | 10,20.964705882353,1804,85,0.40086788585782,0.061366469353437,25832.945611
 12 | 11,18.073529411765,1622,68,0.58332307624817,0.069838082119823,28871.55139184
 13 | 12,-8.2558139534884,1330,43,0.60145196096599,0.066045082810801,31952.171814919
 14 | 13,-6.5526315789474,1282,38,0.68109701417387,0.074076765326783,34966.254708767
 15 | 14,-6,1087,32,0.7062286438942,0.081175927717239,37979.379287958
 16 | 15,-7.1935483870968,1058,31,0.73730385822058,0.080288166455925,41006.026421785
 17 | 16,2.8636363636364,843,22,0.76484009151161,0.06838713674061,44023.744074583
 18 | 17,-4.25,841,24,0.82907662519813,0.070006534771994,47071.221020699
 19 | 18,-1.6315789473684,732,19,0.82211218604445,0.079391503483057,50115.644058466
 20 | 19,-7,762,22,1.0358029997051,0.083106067851186,53143.724182606
 21 | 20,-7.4117647058824,599,17,1.0717535401285,0.068865682188421,56224.953065634
 22 | 21,-4.0526315789474,681,19,1.0258434101641,0.07186470163241,59252.510054588
 23 | 22,2.2631578947368,707,19,1.0864659250975,0.059363974899054,62302.120079756
 24 | 23,3.3571428571429,526,14,1.0833346217573,0.063327269699425,65340.640899897
 25 | 24,-0.47368421052632,750,19,1.0997896017432,0.062145422551781,68390.421947956
 26 | 25,-3.2941176470588,640,17,1.102532440275,0.06356831882894,71446.502653837
 27 | 26,-3,656,18,1.0374306339025,0.054802458614111,74549.215828657
 28 | 27,-4,514,13,0.96129315176606,0.054592645399272,77634.196890831
 29 | 28,-4,524,14,0.96601669877768,0.053654642223381,80685.836214781
 30 | 29,-2.1764705882353,617,17,0.9382788464427,0.053507280364633,83716.217218876
 31 | 30,-1.625,564,16,0.90140304681659,0.054804528713226,86816.62901473
 32 | 31,-2.7333333333333,590,15,0.89678108328581,0.055654351443052,89896.952369928
 33 | 32,-4.6666666666667,539,15,0.87170422816277,0.052837499111891,92944.94702816
 34 | 33,0.375,593,16,0.86125646364689,0.054151404440403,96033.474353313
 35 | 34,-5.4285714285714,517,14,0.80565326881409,0.048675433501601,99126.870426178
 36 | 35,-8.3333333333333,418,12,0.83305224263668,0.048897989697754,102218.8840003
 37 | 36,-7.2666666666667,544,15,0.81939549726248,0.062981894731522,105332.10956836
 38 | 37,-5.5333333333333,521,15,0.78063530129194,0.054293885916471,108390.310781
 39 | 38,-8.2857142857143,480,14,0.76713334679604,0.049812828674912,111577.00082707
 40 | 39,-8.2,525,15,0.74416504707932,0.047728421837091,114681.56327724
 41 | 40,-5.6666666666667,558,15,0.70014776280522,0.049224613063037,117785.27115202
 42 | 41,-4.1818181818182,400,11,0.66092579168081,0.047589437417686,120884.00687814
 43 | 42,-6.0833333333333,458,12,0.61858874151111,0.046044506199658,123987.18336725
 44 | 43,-1.0666666666667,569,15,0.61212630918622,0.042183957137167,127176.64506316
 45 | 44,-3,568,16,0.61761562865973,0.048996827743948,130457.61719894
 46 | 45,-1.3333333333333,447,12,0.62245470565557,0.043741934835911,133590.15817618
 47 | 46,-6.3571428571429,507,14,0.62223418441415,0.049592643141747,136745.98338032
 48 | 47,-0.78571428571429,538,14,0.60995001575351,0.040588559448719,140288.5203855
 49 | 48,-0.75,438,12,0.6117885850668,0.042088281303644,143582.21646833
 50 | 49,-9,569,17,0.58831976529956,0.051058675073087,146885.37541223
 51 | 50,-3.2857142857143,513,14,0.58009276488423,0.050145434975624,150514.73280025
 52 | 51,0.85714285714286,516,14,0.56708448454738,0.05352240806818,153905.42626715
 53 | 52,-0.66666666666667,355,9,0.603058814466,0.054049070090055,156998.08905911
 54 | 53,-3.9230769230769,483,13,0.6199501991272,0.051908938907087,160223.2752831
 55 | 54,-7.3333333333333,431,12,0.60428953760862,0.04895763245225,163514.40027618
 56 | 55,0.25,476,12,0.60181038728356,0.050085354879498,166998.89249229
 57 | 56,-5.3846153846154,458,13,0.62117442503572,0.046872297987342,170359.70756102
 58 | 57,-3.2,359,10,0.60641793078184,0.052600849196315,173823.41051316
 59 | 58,-4.8571428571429,492,14,0.62653725367784,0.057909801356494,177196.93280435
 60 | 59,-2.0833333333333,411,12,0.62770763164759,0.044757399730384,180707.99882913
 61 | 60,-6.4545454545455,418,11,0.62793077921867,0.053041671976447,184368.79880023
 62 | 61,-6.6923076923077,478,13,0.62323064267635,0.051051275312901,188232.23590922
 63 | 62,-7.25,437,12,0.64113255578279,0.04879417716153,191757.69521713
 64 | 63,-5.3,369,10,0.62657707825303,0.048134090598673,195594.75462699
 65 | 64,-5.6153846153846,468,13,0.64060921034217,0.045986624030396,199862.88124394
 66 | 65,-4.3636363636364,424,11,0.63786323845387,0.047828262917697,203532.96440005
 67 | 66,-6.2,376,10,0.64415379482508,0.04426491074264,207733.68452883
 68 | 67,-8.6153846153846,444,13,0.6445699121356,0.054958277672529,211655.73910689
 69 | 68,-0.61538461538462,488,13,0.64655533200502,0.038586034059525,215466.90428996
 70 | 69,-4,486,13,0.64888703554869,0.043914513036609,219559.02877498
 71 | 70,-0.33333333333333,447,12,0.64624559032917,0.047641892939806,223804.84991622
 72 | 71,-6.5384615384615,472,13,0.63566690003872,0.049302317127585,227838.63679528
 73 | 72,-4.6363636363636,417,11,0.63711099690199,0.051252304315567,232027.32367206
 74 | 73,-8.2307692307692,443,13,0.63391436266899,0.048665264971554,236188.94491291
 75 | 74,-3.3333333333333,450,12,0.62945883643627,0.048747919064015,241381.24751687
 76 | 75,-3.5384615384615,478,13,0.62911532127857,0.047036009728909,245670.82948804
 77 | 76,-5.0833333333333,442,12,0.61040000587702,0.04652617444098,250570.50234628
 78 | 77,-1.3636363636364,452,11,0.57874590176344,0.045145205818117,258795.74901628
 79 | 78,2.9090909090909,444,11,0.56367688274384,0.046146706014872,265376.65494132
 80 | 79,1,433,11,0.58433644586802,0.044890030667186,273031.80326462
 81 | 80,0.4,407,10,0.56603986006975,0.051472402080894,279453.38454461
 82 | 81,-1.9,396,10,0.56983534914255,0.042509867101908,287339.86959958
 83 | 82,-6.5,427,12,0.57795796376467,0.046896411240101,294809.03612781
 84 | 83,-6.0769230769231,445,13,0.59138572341204,0.043647857218981,301846.70236588
 85 | 84,-2,446,12,0.60097670978308,0.049626976758242,308882.32642794
 86 | 85,-1.2,396,10,0.61287046802044,0.043968385025859,315383.48060513
 87 | 86,-5.2727272727273,397,11,0.61317317944765,0.046134451977909,321902.52821803
 88 | 87,-2,449,11,0.62379645317793,0.049722394168377,328601.83020878
 89 | 88,-6.3333333333333,431,12,0.60928024560213,0.048750120550394,334467.63142991
 90 | 89,-1.6666666666667,452,12,0.61758359968662,0.044201732039452,339582.35990906
 91 | 90,-0.090909090909091,414,11,0.63299699753523,0.045960027575493,345550.17142677
 92 | 91,-1.2727272727273,424,11,0.62909919255972,0.044356597349048,351840.05597472
 93 | 92,-2.3333333333333,447,12,0.6411742810607,0.046878527492285,357930.80812883
 94 | 93,-1.2,403,10,0.63069819927216,0.043852157630026,364560.41527987
 95 | 94,-1.9,423,10,0.64790958750248,0.04368916927278,369735.55567384
 96 | 95,-0.33333333333333,438,12,0.65804475861788,0.0468343205899,373734.55882072
 97 | 96,2.5,374,10,0.65388202512264,0.041903344072402,377166.37988877
 98 | 97,0.75,444,12,0.66520742946863,0.044931938365102,381859.93837285
 99 | 98,-3.0909090909091,422,11,0.67006937599182,0.046003070726991,389255.18656874
100 | 99,-4.3636363636364,393,11,0.64541435182095,0.048469615295529,395686.37279058
101 | 


--------------------------------------------------------------------------------
/plots/Pong2PlayerVS_history_B.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-19.894736842105,1652,76,0.030753178421408,0.020417045775801,0
  3 | 2,10.864864864865,1032,37,0.055571975842118,0.020495216928422,2140.1843442917
  4 | 3,-6.75,566,20,0.078522597447038,0.018338140755892,5024.0690410137
  5 | 4,-17.782608695652,544,23,0.11758578260243,0.019854311227798,7954.2796509266
  6 | 5,8.2571428571429,982,35,0.14490511378646,0.019651415139437,10895.232643127
  7 | 6,-1.3636363636364,735,22,0.156849973768,0.018027543753386,13843.955490112
  8 | 7,4.0555555555556,607,18,0.15613037407398,0.03612357429415,16800.791962147
  9 | 8,17.246376811594,1712,69,0.16600373826176,0.040893536631949,19781.40379405
 10 | 9,8.2608695652174,1735,69,0.16087171534449,0.053060119055212,22781.630583048
 11 | 10,-20.964705882353,1804,85,0.21962278755754,0.079659719966352,25832.945611
 12 | 11,-18.073529411765,1622,68,0.32419759770483,0.085290238887072,28871.55139184
 13 | 12,8.2558139534884,1330,43,0.38745628522336,0.087929436858743,31952.171814919
 14 | 13,6.5526315789474,1282,38,0.45150299651921,0.09053659071913,34966.254708767
 15 | 14,6,1087,32,0.49048775991797,0.080990801326931,37979.379287958
 16 | 15,7.1935483870968,1058,31,0.54364813241363,0.075845399744809,41006.026421785
 17 | 16,-2.8636363636364,843,22,0.6258010738343,0.073953761775978,44023.744074583
 18 | 17,4.25,841,24,0.71101898576319,0.07438159080036,47071.221020699
 19 | 18,1.6315789473684,732,19,0.78495635423064,0.065564899936318,50115.644058466
 20 | 19,7,762,22,0.84685340037942,0.066515339031816,53143.724182606
 21 | 20,7.4117647058824,599,17,0.89461297640204,0.06911209545657,56224.953065634
 22 | 21,4.0526315789474,681,19,1.0150132971406,0.071963771987706,59252.510054588
 23 | 22,-2.2631578947368,707,19,0.97882900413871,0.061374731684104,62302.120079756
 24 | 23,-3.3571428571429,526,14,1.0373114794493,0.055309426490217,65340.640899897
 25 | 24,0.47368421052632,750,19,1.0033243787885,0.056636636704206,68390.421947956
 26 | 25,3.2941176470588,640,17,0.92811875632405,0.054018623730168,71446.502653837
 27 | 26,3,656,18,0.93651082640886,0.055023560766131,74549.215828657
 28 | 27,4,514,13,0.94254752320051,0.058531855151057,77634.196890831
 29 | 28,4,524,14,0.89031834989786,0.056271550633013,80685.836214781
 30 | 29,2.1764705882353,617,17,0.86999075606465,0.051859064750373,83716.217218876
 31 | 30,1.625,564,16,0.82614176213741,0.057562429867685,86816.62901473
 32 | 31,2.7333333333333,590,15,0.82187315291166,0.058495092704892,89896.952369928
 33 | 32,4.6666666666667,539,15,0.789477578789,0.0587079064399,92944.94702816
 34 | 33,-0.375,593,16,0.79569136279821,0.05894677489996,96033.474353313
 35 | 34,5.4285714285714,517,14,0.78301355272532,0.054899339735508,99126.870426178
 36 | 35,8.3333333333333,418,12,0.79044949617982,0.056301230795681,102218.8840003
 37 | 36,7.2666666666667,544,15,0.80112966805696,0.055791450798512,105332.10956836
 38 | 37,5.5333333333333,521,15,0.78027736347914,0.050847516030073,108390.310781
 39 | 38,8.2857142857143,480,14,0.8137402973175,0.048504514113069,111577.00082707
 40 | 39,8.2,525,15,0.79286936756968,0.052606489300728,114681.56327724
 41 | 40,5.6666666666667,558,15,0.78303875213861,0.046839555993676,117785.27115202
 42 | 41,4.1818181818182,400,11,0.81350393545628,0.055344005376101,120884.00687814
 43 | 42,6.0833333333333,458,12,0.82606654143333,0.048681836031377,123987.18336725
 44 | 43,1.0666666666667,569,15,0.79435331770778,0.057932917118073,127176.64506316
 45 | 44,3,568,16,0.77271989172697,0.051193475157022,130457.61719894
 46 | 45,1.3333333333333,447,12,0.77484880280495,0.045013084542006,133590.15817618
 47 | 46,6.3571428571429,507,14,0.78015144973993,0.047682569071651,136745.98338032
 48 | 47,0.78571428571429,538,14,0.77643180459738,0.049572788715363,140288.5203855
 49 | 48,0.75,438,12,0.79014997059107,0.044922678057104,143582.21646833
 50 | 49,9,569,17,0.80040570390224,0.049333242207766,146885.37541223
 51 | 50,3.2857142857143,513,14,0.77136175608635,0.047377686411142,150514.73280025
 52 | 51,-0.85714285714286,516,14,0.74489397323132,0.044408887729049,153905.42626715
 53 | 52,0.66666666666667,355,9,0.75983708006144,0.04565731857717,156998.08905911
 54 | 53,3.9230769230769,483,13,0.74724908590317,0.049826633743942,160223.2752831
 55 | 54,7.3333333333333,431,12,0.74608357340097,0.044874432623386,163514.40027618
 56 | 55,-0.25,476,12,0.72551223999262,0.050711758375168,166998.89249229
 57 | 56,5.3846153846154,458,13,0.75233506691456,0.046698903981596,170359.70756102
 58 | 57,3.2,359,10,0.73104023519158,0.04379932167381,173823.41051316
 59 | 58,4.8571428571429,492,14,0.71849496692419,0.042877251973376,177196.93280435
 60 | 59,2.0833333333333,411,12,0.72559829336405,0.043330619191751,180707.99882913
 61 | 60,6.4545454545455,418,11,0.70747312837839,0.043838432177901,184368.79880023
 62 | 61,6.6923076923077,478,13,0.70651694011688,0.046224248636514,188232.23590922
 63 | 62,7.25,437,12,0.68852741104364,0.049908312603831,191757.69521713
 64 | 63,5.3,369,10,0.66830239707232,0.042447510216385,195594.75462699
 65 | 64,5.6153846153846,468,13,0.6702712700367,0.040575239777565,199862.88124394
 66 | 65,4.3636363636364,424,11,0.66358332365751,0.045900904551148,203532.96440005
 67 | 66,6.2,376,10,0.67817037689686,0.040241999074817,207733.68452883
 68 | 67,8.6153846153846,444,13,0.69724605107307,0.047058273538947,211655.73910689
 69 | 68,0.61538461538462,488,13,0.6848148176074,0.042222519740462,215466.90428996
 70 | 69,4,486,13,0.6842090305686,0.046335531935096,219559.02877498
 71 | 70,0.33333333333333,447,12,0.69121128064394,0.04058175695315,223804.84991622
 72 | 71,6.5384615384615,472,13,0.69477743583918,0.045939372435212,227838.63679528
 73 | 72,4.6363636363636,417,11,0.67862904149294,0.040864990450442,232027.32367206
 74 | 73,8.2307692307692,443,13,0.67067655992508,0.044045647501945,236188.94491291
 75 | 74,3.3333333333333,450,12,0.67136865311861,0.043262280382216,241381.24751687
 76 | 75,3.5384615384615,478,13,0.65915973466635,0.044760326504707,245670.82948804
 77 | 76,5.0833333333333,442,12,0.67250486266613,0.046776309124194,250570.50234628
 78 | 77,1.3636363636364,452,11,0.66918101072311,0.049758086398244,258795.74901628
 79 | 78,-2.9090909090909,444,11,0.68012343931198,0.042351133339107,265376.65494132
 80 | 79,-1,433,11,0.70117538625002,0.044687234297395,273031.80326462
 81 | 80,-0.4,407,10,0.71243719160557,0.043381349245086,279453.38454461
 82 | 81,1.9,396,10,0.7182938054204,0.042881715647876,287339.86959958
 83 | 82,6.5,427,12,0.7005225251317,0.045902971506119,294809.03612781
 84 | 83,6.0769230769231,445,13,0.69715527999401,0.046531330704689,301846.70236588
 85 | 84,2,446,12,0.69628106099367,0.046715326815844,308882.32642794
 86 | 85,1.2,396,10,0.69748233801126,0.040650709867477,315383.48060513
 87 | 86,5.2727272727273,397,11,0.70037325745821,0.040244948428124,321902.52821803
 88 | 87,2,449,11,0.70343012046814,0.045980382755399,328601.83020878
 89 | 88,6.3333333333333,431,12,0.69383743393421,0.041914516195655,334467.63142991
 90 | 89,1.6666666666667,452,12,0.69825469052792,0.039878826737404,339582.35990906
 91 | 90,0.090909090909091,414,11,0.68960511600971,0.040170164361596,345550.17142677
 92 | 91,1.2727272727273,424,11,0.67836646288633,0.042912368401885,351840.05597472
 93 | 92,2.3333333333333,447,12,0.68362768256664,0.043504333540797,357930.80812883
 94 | 93,1.2,403,10,0.67348195332289,0.04388574296236,364560.41527987
 95 | 94,1.9,423,10,0.6916718544364,0.043883678305894,369735.55567384
 96 | 95,0.33333333333333,438,12,0.70352491497993,0.04276246599108,373734.55882072
 97 | 96,-2.5,374,10,0.72184679532051,0.041803027048707,377166.37988877
 98 | 97,-0.75,444,12,0.7270547606945,0.045335452474654,381859.93837285
 99 | 98,3.0909090909091,422,11,0.72387615138292,0.043761293586344,389255.18656874
100 | 99,4.3636363636364,393,11,0.72118641269207,0.042639298856258,395686.37279058
101 | 


--------------------------------------------------------------------------------
/plots/Pong2PlayerVS_meanq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_meanq.png


--------------------------------------------------------------------------------
/plots/Pong2PlayerVS_reward_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_reward_counts.png


--------------------------------------------------------------------------------
/plots/Pong2PlayerVS_tderror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_tderror.png


--------------------------------------------------------------------------------
/plots/Pong2PlayerVS_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_time.png


--------------------------------------------------------------------------------
/plots/Pong2Player_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_average_reward.png


--------------------------------------------------------------------------------
/plots/Pong2Player_episode_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_episode_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player_history_A.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-26.458333333333,654,24,-0.095357276380062,0.043428983919322,0
  3 | 2,-24.260869565217,571,23,-0.25713111811876,0.049565929591656,2219.7107670307
  4 | 3,-30.823529411765,535,17,-0.37960375964642,0.061796604514122,5234.8550970554
  5 | 4,-29.375,719,24,-0.45105549693108,0.053154966473579,8274.6327157021
  6 | 5,-27.578947368421,542,19,-0.51505306136608,0.072533971011639,11343.717128754
  7 | 6,-27.289473684211,1038,38,-0.61315587735176,0.061384726524353,14424.585474968
  8 | 7,-28.514285714286,1008,35,-0.8955296381712,0.055836423873901,17527.729490757
  9 | 8,-22.910714285714,1294,56,-0.98565722322464,0.049536979198456,20646.920996666
 10 | 9,-26.333333333333,573,21,-1.02711254704,0.050674617409706,23793.418364763
 11 | 10,-21.958333333333,537,24,-1.0271174045801,0.049659738302231,26948.615488768
 12 | 11,-28.705882352941,508,17,-1.0852300107479,0.045676375865936,30113.221090794
 13 | 12,-31.333333333333,98,3,-1.0055357445478,0.045826857686043,33269.140145779
 14 | 13,-30.9375,505,16,-0.96134313106537,0.044203288912773,36434.461924553
 15 | 14,-28.705882352941,507,17,-0.91738075852394,0.046511339187622,39919.898360729
 16 | 15,-28.705882352941,507,17,-0.87516816967726,0.045471202611923,43082.808995724
 17 | 16,-36.818181818182,419,11,-0.85296364432573,0.050174029350281,46241.313314676
 18 | 17,-37,394,10,-0.81926957434416,0.042408460795879,49401.823321819
 19 | 18,-36.454545454545,417,11,-0.80404985666275,0.052845206677914,52556.488364697
 20 | 19,-36.181818181818,403,11,-0.72174312716722,0.045161524236202,55721.547684669
 21 | 20,-34.727272727273,391,11,-0.75188615107536,0.045518432587385,58879.465076685
 22 | 21,-36.666666666667,366,9,-0.71129180133343,0.045481154322624,62042.817330837
 23 | 22,-30.636363636364,359,11,-0.69675636130571,0.042603764116764,65203.508159876
 24 | 23,-34,365,10,-0.66037705516815,0.045244661659002,68371.994807005
 25 | 24,-37.888888888889,351,9,-0.6214785618186,0.041710187494755,71532.411481619
 26 | 25,-34.555555555556,334,9,-0.57756735938787,0.038945589452982,74696.672069788
 27 | 26,-35.666666666667,341,9,-0.54762472438812,0.041154150933027,77855.706784964
 28 | 27,-33.777777777778,331,9,-0.52157988876104,0.040725954800844,81024.021785021
 29 | 28,-37.125,308,8,-0.50267528045177,0.039348959892988,84182.163150072
 30 | 29,-32.818181818182,388,11,-0.5208072988987,0.04023371258378,87344.461227179
 31 | 30,-34,317,9,-0.50427635723352,0.039969101399183,90501.760588169
 32 | 31,-35.111111111111,334,9,-0.46341560208797,0.039467708855867,93669.918260336
 33 | 32,-33.7,345,10,-0.44287770795822,0.040555094107985,96827.402169228
 34 | 33,-37.875,320,8,-0.39677744108438,0.040005072303116,99991.178019285
 35 | 34,-34.75,300,8,-0.39570564180613,0.035729702591896,103149.88645959
 36 | 35,-35.142857142857,273,7,-0.409107203722,0.039551196306944,106317.52060747
 37 | 36,-35,316,9,-0.40082246625423,0.03927739597857,109477.61490846
 38 | 37,-31.444444444444,304,9,-0.3964589330554,0.038392450235784,112644.04946756
 39 | 38,-35,325,9,-0.35121801757812,0.038004578579217,115803.35801244
 40 | 39,-34,308,9,-0.32286202985048,0.037074327471666,118968.0033195
 41 | 40,-36.625,309,8,-0.32320038503408,0.037060650097206,122125.33657956
 42 | 41,-38.142857142857,292,7,-0.32281405234337,0.036791522218846,125288.71097755
 43 | 42,-34.25,295,8,-0.321888240695,0.038397595276125,128449.8854835
 44 | 43,-36.428571428571,284,7,-0.33277552944422,0.035203957707388,131617.50346541
 45 | 44,-36.875,297,8,-0.35373691809177,0.036649934265763,134780.73201251
 46 | 45,-34.875,289,8,-0.36005285412073,0.039131956782192,137949.28987861
 47 | 46,-33.75,277,8,-0.35520882689953,0.035049023443833,141109.40660763
 48 | 47,-35.25,289,8,-0.33053499352932,0.036487454757094,144275.81889558
 49 | 48,-35.25,307,8,-0.33954701691866,0.037912156413309,147435.31064677
 50 | 49,-39.571428571429,282,7,-0.32449682414532,0.039220210487023,150603.13879275
 51 | 50,-36,273,7,-0.31643177467585,0.037731511502527,153765.27083468
 52 | 51,-33.714285714286,267,7,-0.30931244218349,0.038934745969018,156934.99299765
 53 | 52,-36.428571428571,279,7,-0.28822984421253,0.040141627021134,160095.28643036
 54 | 53,-34.875,289,8,-0.29541040021181,0.035895705933799,163261.62074041
 55 | 54,-35.75,288,8,-0.27794497185946,0.040368533103727,166420.72678161
 56 | 55,-36.142857142857,276,7,-0.27815217393637,0.037139108544681,169588.44667172
 57 | 56,-36.714285714286,286,7,-0.25671535021067,0.039222533322871,172748.50242591
 58 | 57,-37.285714285714,285,7,-0.25984488493204,0.037240513943136,175910.684515
 59 | 58,-35.428571428571,270,7,-0.26506305646896,0.037857254445553,179070.61611986
 60 | 59,-36.285714285714,279,7,-0.26481886297464,0.038876299795229,182233.66440177
 61 | 60,-34.25,298,8,-0.27339893108606,0.03648377366364,185392.0522368
 62 | 61,-38,279,7,-0.26885661417246,0.036164035882335,188557.70815563
 63 | 62,-36.142857142857,286,7,-0.27515106755495,0.035154181305785,191718.52969551
 64 | 63,-33.875,295,8,-0.29238852584362,0.038873594135512,194886.45836949
 65 | 64,-31,261,8,-0.27444543755054,0.03615041842591,198048.08661747
 66 | 65,-37.857142857143,280,7,-0.27285833537579,0.037310198902152,201217.75610328
 67 | 66,-37.714285714286,284,7,-0.27229812979698,0.037948209465947,204377.43602824
 68 | 67,-36.714285714286,273,7,-0.26851659548283,0.037649981425377,207544.57766247
 69 | 68,-36.428571428571,287,7,-0.27455387759209,0.036888742106967,210706.8551352
 70 | 69,-36,273,7,-0.30327244818211,0.036359902586089,213871.46873212
 71 | 70,-35.5,244,6,-0.28581845986843,0.038536409900407,217029.25653219
 72 | 71,-36.428571428571,266,7,-0.28054606413841,0.03837350646453,220194.61400127
 73 | 72,-35,275,7,-0.30645019698143,0.037592195025645,223354.55645609
 74 | 73,-32.5,275,8,-0.30246568739414,0.03762510857312,226520.75250101
 75 | 74,-36.285714285714,265,7,-0.29572137546539,0.036622938405257,229680.09711981
 76 | 75,-32.125,291,8,-0.27788608777523,0.043533025629818,232846.38811898
 77 | 76,-33.571428571429,266,7,-0.25835147070885,0.03797403547354,236005.74047184
 78 | 77,-35.666666666667,239,6,-0.25210114300251,0.037980964697897,239172.54759908
 79 | 78,-39.333333333333,269,6,-0.24693970394135,0.039258848479018,242333.03757191
 80 | 79,-32.375,276,8,-0.24059923732281,0.040176058832556,245498.53613424
 81 | 80,-36.428571428571,270,7,-0.24444462656975,0.040253090277314,248660.04212308
 82 | 81,-35.571428571429,267,7,-0.25956621682644,0.038885270408355,251829.45792484
 83 | 82,-39,49,1,-0.25390129482746,0.038192279633135,254992.96653295
 84 | 83,-36.8,201,5,-0.25089377653599,0.034228633511811,258160.73915601
 85 | 84,0,32,0,-0.25882938551903,0.035521016081795,261648.22523189
 86 | 85,-31,71,1,-0.26667385280132,0.036877594493562,264815.68217468
 87 | 86,-36.142857142857,266,7,-0.26922864258289,0.035662436407525,268298.59088683
 88 | 87,-36,245,6,-0.26433752679825,0.038297658568248,271804.28529191
 89 | 88,-30.875,253,8,-0.26904065239429,0.038530012675561,274966.89393377
 90 | 89,0,12,0,-0.2529178994894,0.03857961513754,278135.46529984
 91 | 90,0,30,0,-0.25320659327507,0.036184652519412,281300.25560284
 92 | 91,-35,257,7,-0.2507867937088,0.037512870031409,284785.61747599
 93 | 92,-37.857142857143,279,7,-0.24493289411068,0.038173165585846,288264.99663019
 94 | 93,0,8,0,-0.28029315757751,0.036752135819988,291439.21841502
 95 | 94,0,11,0,-0.28757838642597,0.037329511399381,294786.05089426
 96 | 95,0,37,0,-0.26499050378799,0.03641091414704,298514.9797523
 97 | 96,0,14,0,-0.27337732851505,0.037253945605364,302185.86876249
 98 | 97,-33.5,91,2,-0.26746787285805,0.03588946757419,305871.62223148
 99 | 98,0,7,0,-0.27868657195568,0.03872905414924,309507.54011035
100 | 


--------------------------------------------------------------------------------
/plots/Pong2Player_history_B.csv:
--------------------------------------------------------------------------------
  1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds
  2 | 1,-26.458333333333,654,24,-0.10316917291284,0.03171840660274,0
  3 | 2,-24.260869565217,571,23,-0.21664407390356,0.020301910638809,2219.7107670307
  4 | 3,-30.823529411765,535,17,-0.31796842944622,0.031760104954243,5234.8550970554
  5 | 4,-29.375,719,24,-0.39784982442856,0.03718723988533,8274.6327157021
  6 | 5,-27.578947368421,542,19,-0.49098414742947,0.046165034890175,11343.717128754
  7 | 6,-27.289473684211,1038,38,-0.54109015369415,0.054801562190056,14424.585474968
  8 | 7,-28.514285714286,1008,35,-0.68634745073318,0.046342434048653,17527.729490757
  9 | 8,-22.910714285714,1294,56,-0.78734533989429,0.052283799111843,20646.920996666
 10 | 9,-26.333333333333,573,21,-0.96680954873562,0.050795489311218,23793.418364763
 11 | 10,-21.958333333333,537,24,-0.94446024823189,0.047748475790024,26948.615488768
 12 | 11,-28.705882352941,508,17,-0.90576515161991,0.046088018536568,30113.221090794
 13 | 12,-31.333333333333,98,3,-0.88200869899988,0.049653822243214,33269.140145779
 14 | 13,-30.9375,505,16,-0.8679031342864,0.052557094037533,36434.461924553
 15 | 14,-28.705882352941,507,17,-0.87953285455704,0.05192302185297,39919.898360729
 16 | 15,-28.705882352941,507,17,-0.79656422615051,0.055768653512001,43082.808995724
 17 | 16,-36.818181818182,419,11,-0.7973798545599,0.053919546604156,46241.313314676
 18 | 17,-37,394,10,-0.79324904960394,0.056587139368057,49401.823321819
 19 | 18,-36.454545454545,417,11,-0.75869192743301,0.056097362339497,52556.488364697
 20 | 19,-36.181818181818,403,11,-0.72147205990553,0.056252910256386,55721.547684669
 21 | 20,-34.727272727273,391,11,-0.69017216366529,0.050779387712479,58879.465076685
 22 | 21,-36.666666666667,366,9,-0.70132182663679,0.048130309581757,62042.817330837
 23 | 22,-30.636363636364,359,11,-0.68402341276407,0.047746060490608,65203.508159876
 24 | 23,-34,365,10,-0.62059300380945,0.047794467508793,68371.994807005
 25 | 24,-37.888888888889,351,9,-0.56811383879185,0.044462095856667,71532.411481619
 26 | 25,-34.555555555556,334,9,-0.54428851336241,0.049049448788166,74696.672069788
 27 | 26,-35.666666666667,341,9,-0.53194153410196,0.042404607594013,77855.706784964
 28 | 27,-33.777777777778,331,9,-0.52397324371338,0.044501958101988,81024.021785021
 29 | 28,-37.125,308,8,-0.49280105501413,0.043603889077902,84182.163150072
 30 | 29,-32.818181818182,388,11,-0.45164683359861,0.048712482556701,87344.461227179
 31 | 30,-34,317,9,-0.45899818623066,0.046882577434182,90501.760588169
 32 | 31,-35.111111111111,334,9,-0.42163554221392,0.041218542262912,93669.918260336
 33 | 32,-33.7,345,10,-0.41124828916788,0.042786059759557,96827.402169228
 34 | 33,-37.875,320,8,-0.36733940595388,0.042581390786916,99991.178019285
 35 | 34,-34.75,300,8,-0.36165858471394,0.041615742020309,103149.88645959
 36 | 35,-35.142857142857,273,7,-0.36283807462454,0.04286250728555,106317.52060747
 37 | 36,-35,316,9,-0.35213853293657,0.04135294483602,109477.61490846
 38 | 37,-31.444444444444,304,9,-0.37873019325733,0.039976343035698,112644.04946756
 39 | 38,-35,325,9,-0.37086350005865,0.045977050364017,115803.35801244
 40 | 39,-34,308,9,-0.387158010602,0.04341797137633,118968.0033195
 41 | 40,-36.625,309,8,-0.38804112797976,0.041255482595414,122125.33657956
 42 | 41,-38.142857142857,292,7,-0.37829089355469,0.042000484826975,125288.71097755
 43 | 42,-34.25,295,8,-0.41269392293692,0.043908545061946,128449.8854835
 44 | 43,-36.428571428571,284,7,-0.35951180899143,0.040556112715974,131617.50346541
 45 | 44,-36.875,297,8,-0.32541014826298,0.040843158771982,134780.73201251
 46 | 45,-34.875,289,8,-0.34633871275187,0.040638292525196,137949.28987861
 47 | 46,-33.75,277,8,-0.32167219752073,0.040835269892123,141109.40660763
 48 | 47,-35.25,289,8,-0.33032851690054,0.040877664363012,144275.81889558
 49 | 48,-35.25,307,8,-0.33778543055058,0.040645833931863,147435.31064677
 50 | 49,-39.571428571429,282,7,-0.35659436476231,0.042142257492058,150603.13879275
 51 | 50,-36,273,7,-0.34156268209219,0.038646750929765,153765.27083468
 52 | 51,-33.714285714286,267,7,-0.33958718246222,0.04115854826197,156934.99299765
 53 | 52,-36.428571428571,279,7,-0.33137782621384,0.040307806527708,160095.28643036
 54 | 53,-34.875,289,8,-0.30994284754992,0.041334426323883,163261.62074041
 55 | 54,-35.75,288,8,-0.29656387126446,0.040181493939599,166420.72678161
 56 | 55,-36.142857142857,276,7,-0.29876182216406,0.041037688679993,169588.44667172
 57 | 56,-36.714285714286,286,7,-0.30332996743917,0.040273162252735,172748.50242591
 58 | 57,-37.285714285714,285,7,-0.31741343575716,0.03937012092676,175910.684515
 59 | 58,-35.428571428571,270,7,-0.32956989979744,0.042279763463885,179070.61611986
 60 | 59,-36.285714285714,279,7,-0.33301015365124,0.04055048728548,182233.66440177
 61 | 60,-34.25,298,8,-0.3283772597909,0.039634348405059,185392.0522368
 62 | 61,-38,279,7,-0.32743104177713,0.043790440815501,188557.70815563
 63 | 62,-36.142857142857,286,7,-0.32915987682343,0.04203778380272,191718.52969551
 64 | 63,-33.875,295,8,-0.36024566584826,0.042172495619394,194886.45836949
 65 | 64,-31,261,8,-0.35764328289032,0.040637518398464,198048.08661747
 66 | 65,-37.857142857143,280,7,-0.34844648963213,0.040825937362388,201217.75610328
 67 | 66,-37.714285714286,284,7,-0.33217490136623,0.040230352470651,204377.43602824
 68 | 67,-36.714285714286,273,7,-0.32567139828205,0.041850845268928,207544.57766247
 69 | 68,-36.428571428571,287,7,-0.29867742705345,0.03995036158897,210706.8551352
 70 | 69,-36,273,7,-0.27895277392864,0.041134778410196,213871.46873212
 71 | 70,-35.5,244,6,-0.28795940876007,0.041708505763207,217029.25653219
 72 | 71,-36.428571428571,266,7,-0.29409499913454,0.037932520796545,220194.61400127
 73 | 72,-35,275,7,-0.30484864252806,0.036504096915596,223354.55645609
 74 | 73,-32.5,275,8,-0.30799613463879,0.043229171217419,226520.75250101
 75 | 74,-36.285714285714,265,7,-0.29752028590441,0.043337372859009,229680.09711981
 76 | 75,-32.125,291,8,-0.30138940262794,0.044016310835257,232846.38811898
 77 | 76,-33.571428571429,266,7,-0.29435864454508,0.043229652024806,236005.74047184
 78 | 77,-35.666666666667,239,6,-0.31954347169399,0.040958518084604,239172.54759908
 79 | 78,-39.333333333333,269,6,-0.30283958315849,0.042423107903916,242333.03757191
 80 | 79,-32.375,276,8,-0.29783964937925,0.039569395420607,245498.53613424
 81 | 80,-36.428571428571,270,7,-0.27939842480421,0.041614939747378,248660.04212308
 82 | 81,-35.571428571429,267,7,-0.27845881319046,0.041564103236422,251829.45792484
 83 | 82,-39,49,1,-0.29774576640129,0.041946764576249,254992.96653295
 84 | 83,-36.8,201,5,-0.29520063459873,0.041044882636517,258160.73915601
 85 | 84,0,32,0,-0.30711828100681,0.03780022122385,261648.22523189
 86 | 85,-31,71,1,-0.28975769090652,0.041091928943992,264815.68217468
 87 | 86,-36.142857142857,266,7,-0.27943712234497,0.038912671593949,268298.59088683
 88 | 87,-36,245,6,-0.25322439110279,0.039827098444104,271804.28529191
 89 | 88,-30.875,253,8,-0.28580475902557,0.040129517577589,274966.89393377
 90 | 89,0,12,0,-0.30222401273251,0.040915508701699,278135.46529984
 91 | 90,0,30,0,-0.31114657723904,0.040820374908857,281300.25560284
 92 | 91,-35,257,7,-0.30451617670059,0.039868107420392,284785.61747599
 93 | 92,-37.857142857143,279,7,-0.32563604104519,0.038739177445881,288264.99663019
 94 | 93,0,8,0,-0.34277233123779,0.041662729106494,291439.21841502
 95 | 94,0,11,0,-0.34442797040939,0.038347307886463,294786.05089426
 96 | 95,0,37,0,-0.32848795676231,0.040192691942677,298514.9797523
 97 | 96,0,14,0,-0.31607774305344,0.038031559703173,302185.86876249
 98 | 97,-33.5,91,2,-0.32555533874035,0.040140081637539,305871.62223148
 99 | 98,0,7,0,-0.30362955224514,0.039050936597399,309507.54011035
100 | 


--------------------------------------------------------------------------------
/plots/Pong2Player_meanq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_meanq.png


--------------------------------------------------------------------------------
/plots/Pong2Player_reward_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_reward_counts.png


--------------------------------------------------------------------------------
/plots/Pong2Player_tderror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_tderror.png


--------------------------------------------------------------------------------
/plots/Pong2Player_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_time.png


--------------------------------------------------------------------------------
/plots/plot.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | import matplotlib as mpl
  5 | 
  6 | csv_files = [
  7 |   "Pong2Player.csv",
  8 |   "Pong2PlayerVS.csv",
  9 | #  "Pong2Player05p.csv",
 10 | ]
 11 | 
 12 | labels = [
 13 |   "Cooperative",
 14 |   "Competitive",
 15 | #  "rho = 0.5",
 16 | ]
 17 | 
 18 | suffixes = [
 19 |   "cooperative",
 20 |   "competitive",
 21 | #  "05p",
 22 | ]
 23 | 
 24 | data = []
 25 | for i, file_name in enumerate(csv_files):
 26 |   data.append(dict())
 27 |   with open(file_name, 'rb') as csv_file:
 28 |     csv_reader = csv.reader(csv_file, delimiter=";")
 29 |     csv_reader.next() # skip first row
 30 |     for row in csv_reader:
 31 |       if not row[0].isdigit():
 32 |         continue
 33 |       epoch = int(row[0])
 34 |       if epoch not in data[i].keys():
 35 |         data[i][epoch] = []
 36 |       wallbounces = float(row[2])
 37 |       sidebounces = float(row[3])
 38 |       points = float(row[4])
 39 |       servingtime = float(row[5])
 40 |       data[i][epoch].append((
 41 |         sidebounces / points,
 42 |         wallbounces / (wallbounces if sidebounces == 0 else sidebounces),
 43 |         servingtime / points / 4
 44 |       ))
 45 | 
 46 | epochs = []
 47 | means = []
 48 | stds = []
 49 | for i, file_name in enumerate(csv_files):
 50 |   epochs.append([])
 51 |   means.append([])
 52 |   stds.append([])
 53 |   for epoch in sorted(data[i].keys()):
 54 |     epochs[i].append(int(epoch))
 55 |     a = np.array(data[i][epoch])
 56 |     mean = np.mean(a, axis=0)
 57 |     means[i].append(mean)
 58 |     std = np.std(a, axis=0)
 59 |     stds[i].append(std)
 60 | 
 61 | epochs = [np.array(a) for a in epochs]
 62 | means = [np.array(a) for a in means]
 63 | stds = [np.array(a) for a in stds]
 64 | 
 65 | mpl.rcParams['lines.linewidth'] = 2
 66 | mpl.rcParams['xtick.labelsize'] = 'small'
 67 | mpl.rcParams['ytick.labelsize'] = 'small'
 68 | mpl.rcParams['axes.labelsize'] = 'small'
 69 | mpl.rcParams['legend.fontsize'] = 'small'
 70 | 
 71 | dpi = 300
 72 | plt.figure(figsize=(4,3))
 73 | 
 74 | for i, file_name in enumerate(csv_files):
 75 |   #plt.errorbar(epochs[i][10:], means[i][10:,1], yerr=stds[i][10:,1])
 76 |   plt.plot(epochs[i][10:], means[i][10:,1])
 77 | plt.locator_params(axis='y', nbins=5)
 78 | plt.ylabel("Wall-bounces per paddle-bounce")
 79 | plt.xlabel("Epoch")
 80 | lgd = plt.legend(labels, loc="lower center", bbox_to_anchor=(0.43, 1.),
 81 |            ncol=2, columnspacing=1, frameon=False)
 82 | plt.tight_layout()
 83 | plt.savefig('wallbounces_history.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight')
 84 | 
 85 | plt.clf()
 86 | #f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(4,3))
 87 | #ax = plt.subplot2grid((2,2), (0,0), rowspan=2)
 88 | #ax1 = plt.subplot2grid((2,2), (0,1))
 89 | #ax2 = plt.subplot2grid((2,2), (1,1))
 90 | f = plt.figure(figsize=(4,3))
 91 | ax = f.add_subplot(111)    # The big subplot
 92 | ax1 = f.add_subplot(211)
 93 | ax2 = f.add_subplot(212)
 94 | for i, file_name in enumerate(csv_files):
 95 |   #plt.errorbar(epochs[i], means[i][:,1], yerr=stds[i][:,1])
 96 |   ax1.plot(epochs[i], means[i][:,0])
 97 |   ax2.plot(epochs[i], means[i][:,0])
 98 | 
 99 | ax1.set_ylim(16, 400)
100 | ax2.set_ylim(0, 11)
101 | ax1.spines['bottom'].set_visible(False)
102 | ax2.spines['top'].set_visible(False)
103 | ax1.xaxis.tick_top()
104 | ax1.tick_params(labeltop='off')  # don't put tick labels at the top
105 | ax2.xaxis.tick_bottom()
106 | ax1.locator_params(axis='y', nbins=3)
107 | ax2.locator_params(axis='y', nbins=3)
108 | 
109 | d = .015  # how big to make the diagonal lines in axes coordinates
110 | # arguments to pass plot, just so we don't keep repeating them
111 | kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
112 | ax1.plot((-d, +d), (-d, +d), **kwargs)        # top-left diagonal
113 | ax1.plot((1 - d, 1 + d), (-d, +d), **kwargs)  # top-right diagonal
114 | 
115 | kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
116 | ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs)  # bottom-left diagonal
117 | ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)  # bottom-right diagonal
118 | 
119 | ax.spines['top'].set_color('none')
120 | ax.spines['bottom'].set_color('none')
121 | ax.spines['left'].set_color('none')
122 | ax.spines['right'].set_color('none')
123 | ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
124 | 
125 | ax.set_ylabel("Paddle-bounces per point", labelpad=10)
126 | ax.set_xlabel("Epoch")
127 | #ax = plt.gca()
128 | #ax.set_yscale("log")
129 | lgd = ax1.legend(labels, loc="lower center", bbox_to_anchor=(0.43, 1.),
130 |            ncol=2, columnspacing=1, frameon=False)
131 | plt.tight_layout()
132 | f.subplots_adjust(hspace=0.1)
133 | plt.savefig('sidebounces_history.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight')
134 | 
135 | plt.clf()
136 | for i, file_name in enumerate(csv_files):
137 |   #plt.errorbar(epochs[i], means[i][:,2], yerr=stds[i][:,2])
138 |   plt.plot(epochs[i], means[i][:,2])
139 | plt.locator_params(axis='y', nbins=5)
140 | plt.ylabel("Serving time per point")
141 | plt.xlabel("Epoch")
142 | lgd = plt.legend(labels, loc="lower center", bbox_to_anchor=(0.41, 1.),
143 |            ncol=2, columnspacing=1, frameon=False)
144 | plt.tight_layout()
145 | plt.savefig('serving_time_history.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight')
146 | 
147 | for i, file_name in enumerate(csv_files):
148 |   plt.clf()
149 |   plt.plot(epochs[i][10:], means[i][10:,1])
150 |   plt.locator_params(axis='y', nbins=5)
151 |   plt.ylabel("Wall-bounces per paddle-bounce")
152 |   plt.xlabel("Epoch")
153 |   plt.tight_layout()
154 |   plt.savefig('wallbounces_history_%s.png' % suffixes[i], dpi=dpi)
155 | 
156 |   plt.clf()
157 |   plt.plot(epochs[i], means[i][:,0])
158 |   plt.locator_params(axis='y', nbins=5)
159 |   plt.ylabel("Paddle-bounces per point")
160 |   plt.xlabel("Epoch")
161 |   #ax = plt.gca()
162 |   #ax.set_yscale("log")
163 |   #plt.ylim(ymin=-10)
164 |   plt.tight_layout()
165 |   plt.savefig('sidebounces_history_%s.png' % suffixes[i], dpi=dpi)
166 | 
167 |   plt.clf()
168 |   plt.plot(epochs[i], means[i][:,2])
169 |   plt.locator_params(axis='y', nbins=5)
170 |   plt.ylabel("Serving time per point")
171 |   plt.xlabel("Epoch")
172 |   plt.tight_layout()
173 |   plt.savefig('serving_time_history_%s.png' % suffixes[i], dpi=dpi)
174 | 


--------------------------------------------------------------------------------
/plots/plot_history.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import matplotlib as mpl
 5 | 
 6 | csv_files = [
 7 |   "Pong2PlayerVS", 
 8 |   "Pong2Player075p",
 9 |   "Pong2Player05p",
10 |   "Pong2Player025p",
11 |   "Pong2Player0",
12 |   "Pong2Player025",
13 |   "Pong2Player05",
14 |   "Pong2Player075",
15 |   "Pong2Player",
16 | ]
17 | 
18 | mpl.rcParams['lines.linewidth'] = 2
19 | mpl.rcParams['xtick.labelsize'] = 'small'
20 | mpl.rcParams['ytick.labelsize'] = 'small'
21 | mpl.rcParams['axes.labelsize'] = 'small'
22 | mpl.rcParams['legend.fontsize'] = 'small'
23 | 
24 | dpi = 300
25 | plt.figure(figsize=(4,3))
26 | 
27 | data = []
28 | for i, file_name in enumerate(csv_files):
29 |   print file_name
30 |   data_a = np.loadtxt(file_name + "_history_A.csv", delimiter = ",", skiprows = 1)
31 |   data_b = np.loadtxt(file_name + "_history_B.csv", delimiter = ",", skiprows = 1)
32 | 
33 |   plt.clf()
34 |   plt.plot(data_a[:,0] / 2, data_a[:,1])
35 |   plt.plot(data_b[:,0] / 2, data_b[:,1])
36 |   plt.ylabel("Average reward")
37 |   plt.xlabel("Epoch")
38 |   lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.),
39 |            ncol=2, columnspacing=1, frameon=False)
40 |   #plt.tight_layout()
41 |   plt.savefig(file_name + '_average_reward.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight')
42 | 
43 |   plt.clf()
44 |   plt.plot(data_a[:,0] / 2, data_a[:,2])
45 |   plt.plot(data_b[:,0] / 2, data_b[:,2])
46 |   plt.ylabel("Reward count")
47 |   plt.xlabel("Epoch")
48 |   lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.),
49 |            ncol=2, columnspacing=1, frameon=False)
50 |   #plt.tight_layout()
51 |   plt.savefig(file_name + '_reward_counts.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight')
52 | 
53 |   plt.clf()
54 |   plt.plot(data_a[:,0] / 2, data_a[:,3])
55 |   plt.plot(data_b[:,0] / 2, data_b[:,3])
56 |   plt.ylabel("Episode count")
57 |   plt.xlabel("Epoch")
58 |   lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.),
59 |            ncol=2, columnspacing=1, frameon=False)
60 |   #plt.tight_layout()
61 |   plt.savefig(file_name + '_episode_counts.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight')
62 | 
63 |   plt.clf()
64 |   plt.plot(data_a[:,0] / 2, data_a[:,4])
65 |   plt.plot(data_b[:,0] / 2, data_b[:,4])
66 |   plt.ylabel("Mean Q-value")
67 |   plt.xlabel("Epoch")
68 |   lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.),
69 |            ncol=2, columnspacing=1, frameon=False)
70 |   #plt.tight_layout()
71 |   plt.savefig(file_name + '_meanq.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight')
72 | 
73 |   plt.clf()
74 |   plt.plot(data_a[:,0] / 2, data_a[:,5])
75 |   plt.plot(data_b[:,0] / 2, data_b[:,5])
76 |   plt.ylabel("TD error")
77 |   plt.xlabel("Epoch")
78 |   lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.),
79 |            ncol=2, columnspacing=1, frameon=False)
80 |   #plt.tight_layout()
81 |   plt.savefig(file_name + '_tderror.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight')
82 | 
83 |   plt.clf()
84 |   plt.plot(data_a[:,0] / 2, data_a[:,6])
85 |   plt.plot(data_b[:,0] / 2, data_b[:,6])
86 |   plt.ylabel("Time (seconds)")
87 |   plt.xlabel("Epoch")
88 |   lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.),
89 |            ncol=2, columnspacing=1, frameon=False)
90 |   #plt.tight_layout()
91 |   plt.savefig(file_name + '_time.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight')
92 | 


--------------------------------------------------------------------------------
/plots/scatter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import csv
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import matplotlib as mpl
  7 | 
  8 | csv_files = [
  9 |   ("Pong2PlayerVS.csv", "Competitive $\rho=1$"), 
 10 |   ("Pong2Player075p.csv", "Transition $\rho=0.75$"),
 11 |   ("Pong2Player05p.csv", "Transition $\rho=0.5$"),
 12 |   ("Pong2Player025p.csv", "Transition $\rho=0.25$"),
 13 |   ("Pong2Player0.csv", "Transition $\rho=0$"),
 14 |   ("Pong2Player025.csv", "Transition $\rho=-0.25$"), 
 15 |   ("Pong2Player05.csv", "Transition $\rho=-0.5$"), 
 16 |   ("Pong2Player075.csv", "Transition $\rho=-0.75$"),
 17 |   ("Pong2Player.csv", "Cooperative $\rho=-1$"),
 18 | ]
 19 | 
 20 | labels = [
 21 |   r'$\rho = 1$' + '\n(competitive)',
 22 |   r'$0.75$',
 23 |   r'$0.5$',
 24 |   r'$0.25$',
 25 |   r'$0$',
 26 |   r'$-0.25$',
 27 |   r'$-0.5$',
 28 |   r'$-0.75$',
 29 |   r'$-1$' + '\n(cooperative)',
 30 | ]
 31 | 
 32 | sideBouncePerPoint = []
 33 | wallBouncePerSideBounce = []
 34 | avgServingTime = []
 35 | for csv_file, label in csv_files:
 36 |   with open(csv_file, 'rb') as input:
 37 |     csv_reader = csv.reader(input, delimiter=";")
 38 |     csv_reader.next() # skip first row
 39 | 
 40 |     sideBounce = []
 41 |     wallBounce = []
 42 |     points = []
 43 |     servingTime = []
 44 | 
 45 |     for values in csv_reader:
 46 |       if not values[0].isdigit():
 47 |         continue
 48 |       if int(values[0]) != 49:
 49 |         continue
 50 | 
 51 |       wallBounce.append(float(values[2]))
 52 |       sideBounce.append(float(values[3]))
 53 |       points.append(float(values[4]))
 54 |       servingTime.append(float(values[5]))
 55 | 
 56 |     sideBouncePerPoint.append(map(lambda x, y: x / y, sideBounce, points))
 57 |     wallBouncePerSideBounce.append(map(lambda x, y: x / y, wallBounce, sideBounce))
 58 |     avgServingTime.append(map(lambda x, y: x / y / 4, servingTime, points))
 59 | 
 60 | sideBouncePerPoint = np.array(sideBouncePerPoint)
 61 | wallBouncePerSideBounce = np.array(wallBouncePerSideBounce)
 62 | avgServingTime = np.array(avgServingTime)
 63 | x = range(sideBouncePerPoint.shape[0]) * sideBouncePerPoint.shape[1]
 64 | #x += 0.02 * np.random.randn(len(x))
 65 | 
 66 | mpl.rcParams['lines.linewidth'] = 2
 67 | mpl.rcParams['xtick.labelsize'] = 'small'
 68 | mpl.rcParams['ytick.labelsize'] = 'small'
 69 | mpl.rcParams['axes.labelsize'] = 'small'
 70 | mpl.rcParams['legend.fontsize'] = 'small'
 71 | 
 72 | dpi = 300
 73 | plt.figure(figsize=(4.6,3.45))
 74 | 
 75 | plt.scatter(x, sideBouncePerPoint.T.ravel(), alpha = 0.5)
 76 | plt.ylabel("Paddle-bounces per point")
 77 | #ax = plt.gca()
 78 | #ax.set_yscale("log")
 79 | plt.xticks(x, labels)
 80 | plt.locator_params(axis='y', nbins=7)
 81 | plt.xlim((-1, sideBouncePerPoint.shape[0]))
 82 | plt.ylim(ymin=-400)
 83 | plt.tight_layout()
 84 | plt.savefig('sidebounces_per_point_scatter.png', dpi=dpi)
 85 | 
 86 | plt.clf()
 87 | plt.scatter(x, wallBouncePerSideBounce.T.ravel(), alpha = 0.5)
 88 | plt.ylabel("Wall-bounces per paddle-bounce")
 89 | #ax = plt.gca()
 90 | #ax.set_yscale("log")
 91 | plt.xticks(x, labels)
 92 | plt.locator_params(axis='y', nbins=7)
 93 | plt.xlim((-1, wallBouncePerSideBounce.shape[0]))
 94 | plt.ylim(ymin=-0.1)
 95 | plt.tight_layout()
 96 | plt.savefig('wallbounces_per_sidebounce_scatter.png', dpi=dpi)
 97 | 
 98 | plt.clf()
 99 | plt.scatter(x, avgServingTime.T.ravel(), alpha = 0.5)
100 | plt.ylabel("Serving time per point")
101 | #ax = plt.gca()
102 | #ax.set_yscale("log")
103 | plt.xticks(x, labels)
104 | plt.locator_params(axis='y', nbins=7)
105 | plt.xlim((-1, avgServingTime.shape[0]))
106 | plt.ylim(ymin=-100)
107 | plt.tight_layout()
108 | plt.savefig('serving_time_per_point_scatter.png', dpi=dpi)
109 | 


--------------------------------------------------------------------------------
/plots/serving_time_history.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_history.png


--------------------------------------------------------------------------------
/plots/serving_time_history_competitive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_history_competitive.png


--------------------------------------------------------------------------------
/plots/serving_time_history_cooperative.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_history_cooperative.png


--------------------------------------------------------------------------------
/plots/serving_time_per_point.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_per_point.png


--------------------------------------------------------------------------------
/plots/serving_time_per_point_scatter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_per_point_scatter.png


--------------------------------------------------------------------------------
/plots/sidebounces_history.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_history.png


--------------------------------------------------------------------------------
/plots/sidebounces_history_competitive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_history_competitive.png


--------------------------------------------------------------------------------
/plots/sidebounces_history_cooperative.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_history_cooperative.png


--------------------------------------------------------------------------------
/plots/sidebounces_per_point.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_per_point.png


--------------------------------------------------------------------------------
/plots/sidebounces_per_point_scatter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_per_point_scatter.png


--------------------------------------------------------------------------------
/plots/stats.tex:
--------------------------------------------------------------------------------
 1 | Competitive $\rho=1$ & $ 7.15 \pm 1.01 $ & $ 0.87 \pm 0.08 $ & $ 113.87 \pm 40.30 $ \\
 2 | Transition $\rho=0.75$ & $ 7.58 \pm 0.71 $ & $ 0.83 \pm 0.06 $ & $ 129.03 \pm 38.81 $ \\
 3 | Transition $\rho=0.5$ & $ 6.93 \pm 0.49 $ & $ 0.64 \pm 0.03 $ & $ 147.69 \pm 41.02 $ \\
 4 | Transition $\rho=0.25$ & $ 4.49 \pm 0.43 $ & $ 1.11 \pm 0.07 $ & $ 275.90 \pm 38.69 $ \\
 5 | Transition $\rho=0$ & $ 4.31 \pm 0.25 $ & $ 0.78 \pm 0.05 $ & $ 407.64 \pm 100.79 $ \\
 6 | Transition $\rho=-0.25$ & $ 5.21 \pm 0.36 $ & $ 0.60 \pm 0.05 $ & $ 449.18 \pm 99.53 $ \\
 7 | Transition $\rho=-0.5$ & $ 6.20 \pm 0.20 $ & $ 0.38 \pm 0.04 $ & $ 433.39 \pm 98.77 $ \\
 8 | Transition $\rho=-0.75$ & $ 409.50 \pm 535.24 $ & $ 0.02 \pm 0.01 $ & $ 591.62 \pm 302.15 $ \\
 9 | Cooperative $\rho=-1$ & $ 654.66 \pm 542.67 $ & $ 0.01 \pm 0.00 $ & $ 393.34 \pm 138.63 $ \\
10 | 


--------------------------------------------------------------------------------
/plots/table.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import csv
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import matplotlib as mpl
  7 | 
  8 | csv_files = [
  9 |   ("Pong2PlayerVS.csv", "Competitive $\\rho=1$"), 
 10 |   ("Pong2Player075p.csv", "Transition $\\rho=0.75$"),
 11 |   ("Pong2Player05p.csv", "Transition $\\rho=0.5$"),
 12 |   ("Pong2Player025p.csv", "Transition $\\rho=0.25$"),
 13 |   ("Pong2Player0.csv", "Transition $\\rho=0$"),
 14 |   ("Pong2Player025.csv", "Transition $\\rho=-0.25$"), 
 15 |   ("Pong2Player05.csv", "Transition $\\rho=-0.5$"), 
 16 |   ("Pong2Player075.csv", "Transition $\\rho=-0.75$"),
 17 |   ("Pong2Player.csv", "Cooperative $\\rho=-1$"),
 18 | ]
 19 | 
 20 | labels = [
 21 |   r'$\rho = 1$' + '\n(competitive)',
 22 |   r'$0.75$',
 23 |   r'$0.5$',
 24 |   r'$0.25$',
 25 |   r'$0$',
 26 |   r'$-0.25$',
 27 |   r'$-0.5$',
 28 |   r'$-0.75$',
 29 |   r'$-1$' + '\n(cooperative)',
 30 | ]
 31 | 
 32 | stats = []
 33 | with open("stats.tex","w") as output: 
 34 |     #output.write("\hline\n")
 35 |     #output.write("Agent & Average paddle-bounces per point & Average wall-bounces per paddle-bounce & Average serving time per point \\\\\n")
 36 |     #output.write("\hline\n")
 37 |     for csv_file, label in csv_files:
 38 |         with open(csv_file, 'rb') as input:
 39 |           csv_reader = csv.reader(input, delimiter=";")
 40 |           csv_reader.next() # skip first row
 41 | 
 42 |           sideBounce = []
 43 |           wallBounce = []
 44 |           points = []
 45 |           servingTime = []
 46 | 
 47 |           for values in csv_reader:
 48 |               if not values[0].isdigit():
 49 |                 continue
 50 |               if int(values[0]) != 49:
 51 |                 continue
 52 | 
 53 |               wallBounce.append(float(values[2]))
 54 |               sideBounce.append(float(values[3]))
 55 |               points.append(float(values[4]))
 56 |               servingTime.append(float(values[5]))
 57 | 
 58 |           sideBouncePerPoint = map(lambda x, y: x / y, sideBounce, points)
 59 |           wallBouncePerSideBounce = map(lambda x, y: x / y, wallBounce, sideBounce)
 60 |           avgServingTime = map(lambda x, y: x / y / 4, servingTime, points)
 61 | 
 62 |           data = (label, np.mean(sideBouncePerPoint), np.std(sideBouncePerPoint),
 63 |               np.mean(wallBouncePerSideBounce), np.std(wallBouncePerSideBounce),
 64 |               np.mean(avgServingTime), np.std(avgServingTime))
 65 |           stats.append(data[1:])
 66 | 
 67 |           output.write("%s & $ %.2f \pm %.2f $ & $ %.2f \pm %.2f $ & $ %.2f \pm %.2f $ \\\\\n" % data)
 68 |           #output.write("\hline\n")
 69 | 
 70 | stats = np.array(stats)
 71 | x = range(1, stats.shape[0] + 1)
 72 | 
 73 | mpl.rcParams['lines.linewidth'] = 2
 74 | mpl.rcParams['xtick.labelsize'] = 'small'
 75 | mpl.rcParams['ytick.labelsize'] = 'small'
 76 | mpl.rcParams['axes.labelsize'] = 'small'
 77 | mpl.rcParams['legend.fontsize'] = 'small'
 78 | 
 79 | dpi = 300
 80 | plt.figure(figsize=(4,3))
 81 | 
 82 | plt.errorbar(x, stats[:,0], yerr=stats[:,1])
 83 | plt.ylabel("Paddle-bounces per point")
 84 | #ax = plt.gca()
 85 | #ax.set_yscale("log")
 86 | plt.xticks(x, labels)
 87 | plt.locator_params(axis='y', nbins=5)
 88 | plt.xlim((0, len(x) + 1))
 89 | plt.tight_layout()
 90 | plt.savefig('sidebounces_per_point.png', dpi=dpi)
 91 | 
 92 | plt.clf()
 93 | plt.errorbar(x, stats[:,2], yerr=stats[:,3])
 94 | plt.ylabel("Wall-bounces per paddle-bounce")
 95 | #ax = plt.gca()
 96 | #ax.set_yscale("log")
 97 | plt.xticks(x, labels)
 98 | plt.locator_params(axis='y', nbins=5)
 99 | plt.xlim((0, len(x) + 1))
100 | plt.tight_layout()
101 | plt.savefig('wallbounces_per_sidebounce.png', dpi=dpi)
102 | 
103 | plt.clf()
104 | plt.errorbar(x, stats[:,4], yerr=stats[:,5])
105 | plt.ylabel("Serving time per point")
106 | #ax = plt.gca()
107 | #ax.set_yscale("log")
108 | plt.xticks(x, labels)
109 | plt.locator_params(axis='y', nbins=5)
110 | plt.xlim((0, len(x) + 1))
111 | plt.tight_layout()
112 | plt.savefig('serving_time_per_point.png', dpi=dpi)
113 | 


--------------------------------------------------------------------------------
/plots/wallbounces_history.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_history.png


--------------------------------------------------------------------------------
/plots/wallbounces_history_competitive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_history_competitive.png


--------------------------------------------------------------------------------
/plots/wallbounces_history_cooperative.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_history_cooperative.png


--------------------------------------------------------------------------------
/plots/wallbounces_per_sidebounce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_per_sidebounce.png


--------------------------------------------------------------------------------
/plots/wallbounces_per_sidebounce_scatter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_per_sidebounce_scatter.png


--------------------------------------------------------------------------------
/roms/Pong2Player.bin:
--------------------------------------------------------------------------------
1 | pong.bin


--------------------------------------------------------------------------------
/roms/Pong2Player0.bin:
--------------------------------------------------------------------------------
1 | pong.bin


--------------------------------------------------------------------------------
/roms/Pong2Player025.bin:
--------------------------------------------------------------------------------
1 | pong.bin


--------------------------------------------------------------------------------
/roms/Pong2Player025p.bin:
--------------------------------------------------------------------------------
1 | pong.bin


--------------------------------------------------------------------------------
/roms/Pong2Player05.bin:
--------------------------------------------------------------------------------
1 | pong.bin


--------------------------------------------------------------------------------
/roms/Pong2Player05p.bin:
--------------------------------------------------------------------------------
1 | pong.bin


--------------------------------------------------------------------------------
/roms/Pong2Player075.bin:
--------------------------------------------------------------------------------
1 | pong.bin


--------------------------------------------------------------------------------
/roms/Pong2Player075p.bin:
--------------------------------------------------------------------------------
1 | pong.bin


--------------------------------------------------------------------------------
/roms/Pong2PlayerVS.bin:
--------------------------------------------------------------------------------
1 | pong.bin


--------------------------------------------------------------------------------
/roms/README:
--------------------------------------------------------------------------------
1 | Rom files should be put in this directory
2 | 


--------------------------------------------------------------------------------
/roms/breakout.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/roms/breakout.bin


--------------------------------------------------------------------------------
/roms/breakout_2player.bin:
--------------------------------------------------------------------------------
1 | breakout.bin


--------------------------------------------------------------------------------
/roms/pong.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/roms/pong.bin


--------------------------------------------------------------------------------
/roms/surround.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/roms/surround.bin


--------------------------------------------------------------------------------
/roms/wizard_of_wor.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/roms/wizard_of_wor.bin


--------------------------------------------------------------------------------
/roms/wizard_of_wor_2player.bin:
--------------------------------------------------------------------------------
1 | wizard_of_wor.bin


--------------------------------------------------------------------------------
/run_cpu:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  ./run_cpu breakout "; exit 0
 5 | fi
 6 | ENV=$1
 7 | FRAMEWORK="alewrap"
 8 | 
 9 | game_path=$PWD"/roms/"
10 | env_params="useRGB=true"
11 | agent="NeuralQLearner"
12 | n_replay=1
13 | netfile="\"convnet_atari3\""
14 | update_freq=4
15 | actrep=4
16 | discount=0.99
17 | seed=1
18 | learn_start=50000
19 | pool_frms_type="\"max\""
20 | pool_frms_size=2
21 | initial_priority="false"
22 | replay_memory=1000000
23 | eps_end=0.1
24 | eps_endt=replay_memory
25 | lr=0.00025
26 | agent_type="DQN3_0_1"
27 | preproc_net="\"net_downsample_2x_full_y\""
28 | agent_name=$agent_type"_"$1"_FULL_Y"
29 | state_dim=7056
30 | ncols=1
31 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1"
32 | steps=50000000
33 | eval_freq=250000
34 | eval_steps=125000
35 | prog_freq=5000
36 | save_freq=125000
37 | save_versions=$save_freq
38 | gpu=-1
39 | random_starts=30
40 | pool_frms="type="$pool_frms_type",size="$pool_frms_size
41 | num_threads=4
42 | 
43 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -steps $steps -eval_freq $eval_freq -eval_steps $eval_steps -prog_freq $prog_freq -save_freq $save_freq -save_versions $save_versions -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads"
44 | echo $args
45 | 
46 | cd dqn
47 | ../torch/bin/qlua train_agent.lua $args
48 | 


--------------------------------------------------------------------------------
/run_gpu:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  ./run_gpu breakout "; exit 0
 5 | fi
 6 | ENV=$1
 7 | FRAMEWORK="alewrap"
 8 | 
 9 | game_path=$PWD"/roms/"
10 | env_params="useRGB=true"
11 | agent="NeuralQLearner"
12 | n_replay=1
13 | netfile="\"convnet_atari3\""
14 | update_freq=4
15 | actrep=4
16 | discount=0.99
17 | seed=1
18 | learn_start=50000
19 | pool_frms_type="\"max\""
20 | pool_frms_size=2
21 | initial_priority="false"
22 | replay_memory=1000000
23 | eps_end=0.1
24 | eps_endt=replay_memory
25 | lr=0.00025
26 | agent_type="DQN3_0_1"
27 | preproc_net="\"net_downsample_2x_full_y\""
28 | agent_name=$agent_type"_"$1"_FULL_Y"
29 | state_dim=7056
30 | ncols=1
31 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1"
32 | steps=50000000
33 | eval_freq=250000
34 | eval_steps=125000
35 | prog_freq=10000
36 | save_freq=125000
37 | save_versions=$save_freq
38 | gpu=0
39 | random_starts=30
40 | pool_frms="type="$pool_frms_type",size="$pool_frms_size
41 | num_threads=4
42 | 
43 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -steps $steps -eval_freq $eval_freq -eval_steps $eval_steps -prog_freq $prog_freq -save_freq $save_freq -save_versions $save_versions -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads"
44 | echo $args
45 | 
46 | cd dqn
47 | ../torch/bin/qlua train_agent.lua $args
48 | 


--------------------------------------------------------------------------------
/run_gpu2:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  ./run_gpu breakout "; exit 0
 5 | fi
 6 | ENV=$1
 7 | FRAMEWORK="alewrap"
 8 | 
 9 | game_path=$PWD"/roms/"
10 | env_params="useRGB=true"
11 | agent="NeuralQLearner"
12 | n_replay=1
13 | netfile="\"convnet_atari3\""
14 | update_freq=4
15 | actrep=4
16 | discount=0.99
17 | seed=1
18 | learn_start=50000
19 | pool_frms_type="\"max\""
20 | pool_frms_size=2
21 | initial_priority="false"
22 | replay_memory=1000000
23 | eps_end=0.1
24 | eps_endt=replay_memory
25 | lr=0.00025
26 | agent_type="DQN3_0_1"
27 | preproc_net="\"net_downsample_2x_full_y\""
28 | agent_name=$agent_type"_"$1"_FULL_Y_A"
29 | agent_nameB=$agent_type"_"$1"_FULL_Y_B"
30 | state_dim=7056
31 | ncols=1
32 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1"
33 | steps=50000000
34 | eval_freq=125000
35 | eval_steps=62500
36 | prog_freq=10000
37 | save_freq=250000
38 | save_versions=$save_freq
39 | gpu=0
40 | gpuB=1
41 | random_starts=30
42 | pool_frms="type="$pool_frms_type",size="$pool_frms_size
43 | num_threads=4
44 | 
45 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -steps $steps -eval_freq $eval_freq -eval_steps $eval_steps -prog_freq $prog_freq -save_freq $save_freq -save_versions $save_versions -actrep $actrep -gpu $gpu -gpuB $gpuB -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 0"
46 | if [ -n "$2" ]; then
47 |   args="$args -network ${agent_name}_$2.t7 -networkB ${agent_nameB}_$2.t7"
48 | fi
49 | 
50 | echo $args
51 | 
52 | cd dqn
53 | ../torch/bin/qlua train_2agent.lua $args
54 | 


--------------------------------------------------------------------------------
/run_gpu2_resume:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  ./run_gpu breakout "; exit 0
 5 | fi
 6 | ENV=$1
 7 | FRAMEWORK="alewrap"
 8 | 
 9 | game_path=$PWD"/roms/"
10 | env_params="useRGB=true"
11 | agent="NeuralQLearner"
12 | # number of times to perform learning during each step
13 | n_replay=1
14 | netfile="\"convnet_atari3\""
15 | # perform learning after every 4 steps
16 | update_freq=4
17 | # how many times to repeat chosen action
18 | actrep=4
19 | # future reward discount
20 | discount=0.99
21 | # random seed used to initialize torch
22 | seed=1
23 | # start learning after this steps
24 | learn_start=512
25 | # ?
26 | pool_frms_type="\"max\""
27 | pool_frms_size=2
28 | # not used?
29 | initial_priority="false"
30 | # replay memory size
31 | replay_memory=1000000
32 | # exploration rate in the end
33 | eps_end=0.1
34 | # how many steps decay exploration rate
35 | eps_endt=0
36 | # learning rate
37 | lr=0.00025
38 | agent_type="DQN3_0_1"
39 | preproc_net="\"net_downsample_2x_full_y\""
40 | agent_name=$agent_type"_"$1"_FULL_Y_A"
41 | agent_nameB=$agent_type"_"$1"_FULL_Y_B"
42 | # state dimensionality 84x84
43 | state_dim=7056
44 | # number of color channels (greyscale)
45 | ncols=1
46 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1"
47 | # how many steps to train
48 | steps=50000000
49 | # testing frequency
50 | eval_freq=125000
51 | # how many steps to test
52 | eval_steps=62500
53 | # frequency of progress output
54 | prog_freq=10000
55 | # save frequency
56 | save_freq=250000
57 | # how often to save versions (will be used for calculating version numbers)
58 | save_versions=$save_freq
59 | # GPU-s to use, first and second player
60 | gpu=0
61 | gpuB=1
62 | random_starts=30
63 | pool_frms="type="$pool_frms_type",size="$pool_frms_size
64 | num_threads=4
65 | 
66 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -steps $steps -eval_freq $eval_freq -eval_steps $eval_steps -prog_freq $prog_freq -save_freq $save_freq -save_versions $save_versions -actrep $actrep -gpu $gpu -gpuB $gpuB -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 0"
67 | if [ -n "$2" ]; then
68 |   args="$args -network ${agent_name}_$2.t7 -networkB ${agent_nameB}_$2.t7"
69 | fi
70 | 
71 | echo $args
72 | 
73 | cd dqn
74 | ../torch/bin/qlua train_2agent.lua $args
75 | 


--------------------------------------------------------------------------------
/test_cpu:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  ./run_gpu breakout [<version>]"; exit 0
 5 | fi
 6 | ENV=$1
 7 | FRAMEWORK="alewrap"
 8 | 
 9 | game_path=$PWD"/roms/"
10 | env_params="useRGB=true"
11 | agent="NeuralQLearner"
12 | n_replay=1
13 | netfile="\"convnet_atari3\""
14 | update_freq=4
15 | actrep=4
16 | discount=0.99
17 | seed=1
18 | learn_start=0
19 | pool_frms_type="\"max\""
20 | pool_frms_size=2
21 | initial_priority="false"
22 | replay_memory=1000000
23 | eps_end=0.1
24 | eps_endt=replay_memory
25 | lr=0.00025
26 | agent_type="DQN3_0_1"
27 | preproc_net="\"net_downsample_2x_full_y\""
28 | agent_name=$agent_type"_"$1"_FULL_Y"
29 | if [ -z "$2" ]; then
30 |   network_file="$agent_name.t7"
31 |   gif_file="../sessions/$ENV.gif"
32 |   csv_file="../sessions/$ENV.csv"
33 | else
34 |   network_file="${agent_name}_$2.t7"
35 |   gif_file="../sessions/${ENV}_$2.gif"
36 |   csv_file="../sessions/${ENV}_$2.csv"
37 | fi
38 | state_dim=7056
39 | ncols=1
40 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1"
41 | gpu=-1
42 | random_starts=30
43 | pool_frms="type="$pool_frms_type",size="$pool_frms_size
44 | num_threads=4
45 | 
46 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -gif_file $gif_file -csv_file $csv_file"
47 | echo $args
48 | 
49 | cd dqn
50 | ../torch/bin/qlua test_agent.lua $args
51 | 


--------------------------------------------------------------------------------
/test_gpu:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  ./run_gpu breakout [<version>]"; exit 0
 5 | fi
 6 | ENV=$1
 7 | FRAMEWORK="alewrap"
 8 | 
 9 | game_path=$PWD"/roms/"
10 | env_params="useRGB=true"
11 | agent="NeuralQLearner"
12 | n_replay=1
13 | netfile="\"convnet_atari3\""
14 | update_freq=4
15 | actrep=4
16 | discount=0.99
17 | seed=1
18 | learn_start=0
19 | pool_frms_type="\"max\""
20 | pool_frms_size=2
21 | initial_priority="false"
22 | replay_memory=1000000
23 | eps_end=0.1
24 | eps_endt=replay_memory
25 | lr=0.00025
26 | agent_type="DQN3_0_1"
27 | preproc_net="\"net_downsample_2x_full_y\""
28 | agent_name=$agent_type"_"$1"_FULL_Y"
29 | 
30 | if [ -z "$2" ]; then
31 |   network_file="$agent_name.t7"
32 |   gif_file="../sessions/$ENV.gif"
33 |   csv_file="../sessions/$ENV.csv"
34 | else
35 |   network_file="${agent_name}_$2.t7"
36 |   gif_file="../sessions/${ENV}_$2.gif"
37 |   csv_file="../sessions/${ENV}_$2.csv"
38 | fi
39 | state_dim=7056
40 | ncols=1
41 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1"
42 | gpu=0
43 | random_starts=30
44 | pool_frms="type="$pool_frms_type",size="$pool_frms_size
45 | num_threads=4
46 | 
47 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -gif_file $gif_file -csv_file $csv_file"
48 | echo $args
49 | 
50 | cd dqn
51 | ../torch/bin/qlua test_agent.lua $args
52 | 


--------------------------------------------------------------------------------
/test_gpu2:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  ./run_gpu breakout [<version>] [<seed>]"; exit 0
 5 | fi
 6 | ENV=$1
 7 | FRAMEWORK="alewrap"
 8 | 
 9 | game_path=$PWD"/roms/"
10 | env_params="useRGB=true"
11 | agent="NeuralQLearner"
12 | n_replay=1
13 | netfile="\"convnet_atari3\""
14 | update_freq=4
15 | actrep=4
16 | discount=0.99
17 | seed=3
18 | if [ -z "$3" ]; then
19 |   seed=1
20 | else
21 |   seed=$3
22 |   
23 | fi
24 | learn_start=0
25 | pool_frms_type="\"max\""
26 | pool_frms_size=2
27 | initial_priority="false"
28 | replay_memory=1000000
29 | eps_end=0.1
30 | eps_endt=replay_memory
31 | lr=0.00025
32 | agent_type="DQN3_0_1"
33 | preproc_net="\"net_downsample_2x_full_y\""
34 | agent_name=$agent_type"_"$1"_FULL_Y_A"
35 | agent_nameB=$agent_type"_"$1"_FULL_Y_B"
36 | datas_file="$ENV.csv"
37 | if [ -z "$2" ]; then
38 |   network_file="$agent_name.t7"
39 |   network_fileB="$agent_nameB.t7"
40 |   gif_file="../sessions/$ENV.gif"
41 |   csv_file="../sessions/$ENV.csv"
42 |   version=0
43 | else
44 |   network_file="${agent_name}_$2.t7"
45 |   network_fileB="${agent_nameB}_$2.t7"
46 |   gif_file="../sessions/${ENV}_$2.gif"
47 |   csv_file="../sessions/${ENV}_$2.csv"
48 |   version=$2
49 | fi
50 | state_dim=7056
51 | ncols=1
52 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1"
53 | gpu=0
54 | random_starts=30
55 | pool_frms="type="$pool_frms_type",size="$pool_frms_size
56 | num_threads=4
57 | cd dqn
58 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -networkB $network_fileB -gif_file $gif_file -csv_file $csv_file -version $version -datas_file $datas_file"
59 |   ../torch/bin/qlua test_2agent.lua $args
60 | 
61 | 


--------------------------------------------------------------------------------
/test_gpu2_seeds:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  ./run_gpu breakout [<version>] [<maxseed>]"; exit 0
 5 | fi
 6 | 
 7 | ENV=$1
 8 | FRAMEWORK="alewrap"
 9 | 
10 | game_path=$PWD"/roms/"
11 | env_params="useRGB=true"
12 | agent="NeuralQLearner"
13 | n_replay=1
14 | netfile="\"convnet_atari3\""
15 | update_freq=4
16 | actrep=4
17 | discount=0.99
18 | learn_start=0
19 | pool_frms_type="\"max\""
20 | pool_frms_size=2
21 | initial_priority="false"
22 | replay_memory=1000000
23 | eps_end=0.1
24 | eps_endt=replay_memory
25 | lr=0.00025
26 | agent_type="DQN3_0_1"
27 | preproc_net="\"net_downsample_2x_full_y\""
28 | agent_name=$agent_type"_"$1"_FULL_Y_A"
29 | agent_nameB=$agent_type"_"$1"_FULL_Y_B"
30 | datas_file="$ENV.csv"
31 | if [ -z "$2" ]; then
32 |   network_file="$agent_name.t7"
33 |   network_fileB="$agent_nameB.t7"
34 |   gif_file="../sessions/$ENV.gif"
35 |   csv_file="../sessions/$ENV.csv"
36 |   version=0
37 | else
38 |   network_file="${agent_name}_$2.t7"
39 |   network_fileB="${agent_nameB}_$2.t7"
40 |   gif_file="../sessions/${ENV}_$2.gif"
41 |   csv_file="../sessions/${ENV}_$2.csv"
42 |   version=$2
43 | fi
44 | state_dim=7056
45 | ncols=1
46 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1"
47 | gpu=0
48 | random_starts=30
49 | pool_frms="type="$pool_frms_type",size="$pool_frms_size
50 | num_threads=4
51 | 
52 | if [ -z "$3" ]; then
53 |   maxseed=10
54 | else
55 |   maxseed=$3
56 | fi
57 | 
58 | cd dqn
59 | for seed in `seq $maxseed`
60 | do
61 |   echo $seed
62 |   args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -networkB $network_fileB -gif_file $gif_file -csv_file $csv_file -version $version -datas_file $datas_file"
63 |   ../torch/bin/qlua test_2agent.lua $args
64 | done
65 | 
66 | 


--------------------------------------------------------------------------------
/test_gpu2_versions:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" ]
 4 |   then echo "Please provide the name of the game, e.g.  ./run_gpu breakout [<maxversion>] [<maxseed>]"; exit 0
 5 | fi
 6 | 
 7 | ENV=$1
 8 | FRAMEWORK="alewrap"
 9 | 
10 | game_path=$PWD"/roms/"
11 | env_params="useRGB=true"
12 | agent="NeuralQLearner"
13 | n_replay=1
14 | netfile="\"convnet_atari3\""
15 | update_freq=4
16 | actrep=4
17 | discount=0.99
18 | learn_start=0
19 | pool_frms_type="\"max\""
20 | pool_frms_size=2
21 | initial_priority="false"
22 | replay_memory=1000000
23 | eps_end=0.1
24 | eps_endt=replay_memory
25 | lr=0.00025
26 | agent_type="DQN3_0_1"
27 | preproc_net="\"net_downsample_2x_full_y\""
28 | agent_name=$agent_type"_"$1"_FULL_Y_A"
29 | agent_nameB=$agent_type"_"$1"_FULL_Y_B"
30 | datas_file="$ENV.csv"
31 | state_dim=7056
32 | ncols=1
33 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1"
34 | gpu=0
35 | random_starts=30
36 | pool_frms="type="$pool_frms_type",size="$pool_frms_size
37 | num_threads=4
38 | 
39 | if [ -z "$2" ]; then
40 |   maxversion=49
41 | else
42 |   maxversion=$2
43 | fi
44 | 
45 | if [ -z "$3" ]; then
46 |   maxseed=10
47 | else
48 |   maxseed=$3
49 | fi
50 | 
51 | cd dqn
52 | for version in `seq $maxversion`
53 | do
54 |   network_file="${agent_name}_$version.t7"
55 |   network_fileB="${agent_nameB}_$version.t7"
56 |   gif_file="../sessions/${ENV}_$version.gif"
57 |   csv_file="../sessions/${ENV}_$version.csv"
58 | 
59 |     for seed in `seq $maxseed`
60 |     do
61 |       args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -networkB $network_fileB -gif_file $gif_file -csv_file $csv_file -version $version -datas_file $datas_file"
62 |   ../torch/bin/qlua test_2agent.lua $args
63 |     done
64 | done
65 | 
66 | 


--------------------------------------------------------------------------------
/test_schemes:
--------------------------------------------------------------------------------
 1 | ./test_gpu2_versions Pong2PlayerVS 49
 2 | ./test_gpu2_seeds Pong2Player075p 49
 3 | ./test_gpu2_seeds Pong2Player05p 49
 4 | ./test_gpu2_seeds Pong2Player025p 49
 5 | ./test_gpu2_seeds Pong2Player0 49
 6 | ./test_gpu2_seeds Pong2Player05 49
 7 | ./test_gpu2_seeds Pong2Player025 49
 8 | ./test_gpu2_seeds Pong2Player075 49
 9 | ./test_gpu2_versions Pong2Player 49
10 | 


--------------------------------------------------------------------------------