├── .gitignore ├── README.md ├── dqn ├── LICENSE ├── NeuralQLearner.lua ├── NeuralQLearnerB.lua ├── Rectifier.lua ├── Scale.lua ├── TransitionTable.lua ├── convnet.lua ├── convnet_atari3.lua ├── extract_data.lua ├── initenv.lua ├── msleep.c ├── net_downsample_2x_full_y.lua ├── nnutils.lua ├── plot_results.lua ├── test_2agent.lua ├── test_agent.lua ├── train_2agent.lua └── train_agent.lua ├── extract_data ├── install_dependencies.sh ├── plot_2results ├── plot_results ├── plots ├── Pong2Player.csv ├── Pong2Player0.csv ├── Pong2Player025.csv ├── Pong2Player025_average_reward.png ├── Pong2Player025_episode_counts.png ├── Pong2Player025_history_A.csv ├── Pong2Player025_history_B.csv ├── Pong2Player025_meanq.png ├── Pong2Player025_reward_counts.png ├── Pong2Player025_tderror.png ├── Pong2Player025_time.png ├── Pong2Player025p.csv ├── Pong2Player025p_average_reward.png ├── Pong2Player025p_episode_counts.png ├── Pong2Player025p_history_A.csv ├── Pong2Player025p_history_B.csv ├── Pong2Player025p_meanq.png ├── Pong2Player025p_reward_counts.png ├── Pong2Player025p_tderror.png ├── Pong2Player025p_time.png ├── Pong2Player05.csv ├── Pong2Player05_average_reward.png ├── Pong2Player05_episode_counts.png ├── Pong2Player05_history_A.csv ├── Pong2Player05_history_B.csv ├── Pong2Player05_meanq.png ├── Pong2Player05_reward_counts.png ├── Pong2Player05_tderror.png ├── Pong2Player05_time.png ├── Pong2Player05p.csv ├── Pong2Player05p_average_reward.png ├── Pong2Player05p_episode_counts.png ├── Pong2Player05p_history_A.csv ├── Pong2Player05p_history_B.csv ├── Pong2Player05p_meanq.png ├── Pong2Player05p_reward_counts.png ├── Pong2Player05p_tderror.png ├── Pong2Player05p_time.png ├── Pong2Player075.csv ├── Pong2Player075_average_reward.png ├── Pong2Player075_episode_counts.png ├── Pong2Player075_history_A.csv ├── Pong2Player075_history_B.csv ├── Pong2Player075_meanq.png ├── Pong2Player075_reward_counts.png ├── Pong2Player075_tderror.png ├── Pong2Player075_time.png ├── Pong2Player075p.csv ├── Pong2Player075p_average_reward.png ├── Pong2Player075p_episode_counts.png ├── Pong2Player075p_history_A.csv ├── Pong2Player075p_history_B.csv ├── Pong2Player075p_meanq.png ├── Pong2Player075p_reward_counts.png ├── Pong2Player075p_tderror.png ├── Pong2Player075p_time.png ├── Pong2Player0_average_reward.png ├── Pong2Player0_episode_counts.png ├── Pong2Player0_history_A.csv ├── Pong2Player0_history_B.csv ├── Pong2Player0_meanq.png ├── Pong2Player0_reward_counts.png ├── Pong2Player0_tderror.png ├── Pong2Player0_time.png ├── Pong2PlayerVS.csv ├── Pong2PlayerVS_average_reward.png ├── Pong2PlayerVS_episode_counts.png ├── Pong2PlayerVS_history_A.csv ├── Pong2PlayerVS_history_B.csv ├── Pong2PlayerVS_meanq.png ├── Pong2PlayerVS_reward_counts.png ├── Pong2PlayerVS_tderror.png ├── Pong2PlayerVS_time.png ├── Pong2Player_average_reward.png ├── Pong2Player_episode_counts.png ├── Pong2Player_history_A.csv ├── Pong2Player_history_B.csv ├── Pong2Player_meanq.png ├── Pong2Player_reward_counts.png ├── Pong2Player_tderror.png ├── Pong2Player_time.png ├── plot.py ├── plot_history.py ├── scatter.py ├── serving_time_history.png ├── serving_time_history_competitive.png ├── serving_time_history_cooperative.png ├── serving_time_per_point.png ├── serving_time_per_point_scatter.png ├── sidebounces_history.png ├── sidebounces_history_competitive.png ├── sidebounces_history_cooperative.png ├── sidebounces_per_point.png ├── sidebounces_per_point_scatter.png ├── stats.tex ├── table.py ├── wallbounces_history.png ├── wallbounces_history_competitive.png ├── wallbounces_history_cooperative.png ├── wallbounces_per_sidebounce.png └── wallbounces_per_sidebounce_scatter.png ├── roms ├── Pong2Player.bin ├── Pong2Player0.bin ├── Pong2Player025.bin ├── Pong2Player025p.bin ├── Pong2Player05.bin ├── Pong2Player05p.bin ├── Pong2Player075.bin ├── Pong2Player075p.bin ├── Pong2PlayerVS.bin ├── README ├── breakout.bin ├── breakout_2player.bin ├── pong.bin ├── surround.bin ├── wizard_of_wor.bin └── wizard_of_wor_2player.bin ├── run_cpu ├── run_gpu ├── run_gpu2 ├── run_gpu2_resume ├── test_cpu ├── test_gpu ├── test_gpu2 ├── test_gpu2_seeds ├── test_gpu2_versions └── test_schemes /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | torch 3 | dqn/*.t7 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepMind Atari Deep Q Learner for 2 players 2 | 3 | This repository hosts the code to reproduce the experiments in the article "Multiagent Cooperation and Competition with Deep 4 | Reinforcement Learning". It is based on DeepMind's [original code](https://sites.google.com/a/deepmind.com/dqn/), that was modified to support two players. **NB!** Currently only Pong game in two-player mode is supported, support for other games and one-player mode is untested. 5 | 6 | Gameplay videos can be found here: https://www.youtube.com/playlist?list=PLfLv_F3r0TwyaZPe50OOUx8tRf0HwdR_u 7 | 8 | Installation instructions 9 | ------------------------- 10 | 11 | The installation requires Linux with apt-get. 12 | 13 | Note: In order to run the GPU version of DQN, you should additionally have the 14 | NVIDIA® CUDA® (version 5.5 or later) toolkit installed prior to the Torch 15 | installation below. 16 | This can be downloaded from https://developer.nvidia.com/cuda-toolkit 17 | and installation instructions can be found in 18 | http://docs.nvidia.com/cuda/cuda-getting-started-guide-for-linux 19 | 20 | To train DQN on Atari games, the following components must be installed: 21 | * LuaJIT and Torch 7.0 22 | * nngraph 23 | * Xitari (fork of the Arcade Learning Environment (Bellemare et al., 2013)) 24 | * AleWrap (a lua interface to Xitari) 25 | 26 | To install all of the above in a subdirectory called 'torch', it should be enough to run 27 | 28 | ./install_dependencies.sh 29 | 30 | from the base directory of the package. 31 | 32 | 33 | Note: The above install script will install the following packages via apt-get: 34 | build-essential, gcc, g++, cmake, curl, libreadline-dev, git-core, libjpeg-dev, 35 | libpng-dev, ncurses-dev, imagemagick, unzip, libqt4-dev. 36 | 37 | In addition following Lua components are installed to 'torch' subdirectory: 38 | luajit-rocks, cwrap, paths, torch, nn, cutorch, cunn, luafilesystem, penlight, sys, 39 | xlua, image, env, qtlua, qttorch, nngraph, lua-gd. 40 | 41 | Training 42 | -------- 43 | 44 | To run training for a game: 45 | 46 | ./run_gpu2 47 | 48 | Following games are supported: 49 | * `Pong2Player` - cooperative game (\rho = -1) 50 | * `Pong2Player075` - transition (\rho = -0.75) 51 | * `Pong2Player05` - transition (\rho = -0.5) 52 | * `Pong2Player025` - transition (\rho = -0.25) 53 | * `Pong2Player0` - transition (\rho = 0) 54 | * `Pong2Player025p` - transition (\rho = 0.25) 55 | * `Pong2Player05p` - transition (\rho = 0.5) 56 | * `Pong2Player075p` - transition (\rho = 0.75) 57 | * `Pong2PlayerVS` - competitive game (\rho = 1) 58 | 59 | During training the snapshots of networks of both agents are written to `dqn/` folder. These are named `DQN3_0_1__FULL_Y_A_.t7` and `DQN3_0_1__FULL_Y_B_.t7`. One epoch is defined as 250,000 steps and they are numbered starting from 0. **NB!** One epoch snapshot takes about 1GB, therefore for 50 epochs reserve 50GB free space. 60 | 61 | Testing 62 | ------- 63 | 64 | To run testing for one episode: 65 | 66 | ./test_gpu2 67 | 68 | To run testing with different seeds (by default 10): 69 | 70 | ./test_gpu2_seeds 71 | 72 | To run testing with different seeds (by default 10), for all epochs (default 49): 73 | 74 | ./test_gpu2_versions 75 | 76 | To run all experiments at once: 77 | 78 | ./test_schemes 79 | 80 | All these scripts write file `dqn/.csv`, that contains following game statistics: 81 | * *Epoch* - epoch number, 82 | * *Seed* - seed used for this run, 83 | * *WallBounces* - total number of wall-bounces in this run, 84 | * *SideBounce* - total number of paddle-bounces in this run, 85 | * *Points* - total number of points (lost balls) in this run, 86 | * *ServingTime* - total serving time in this run, 87 | * *RewardA* - total reward of player A, 88 | * *RewardB* - total reward of player B. 89 | 90 | **NB!** All scripts append to this file, so after several runs you might want to delete irrelevant lines. 91 | 92 | Extracting training statistics 93 | ---------------------------- 94 | 95 | To plot training history: 96 | 97 | ./plot_2results [] 98 | 99 | Following plots are shown for both agents: 100 | * average reward per game during testing, 101 | * total count of non-zero rewards during testing, 102 | * number of games played during testing, 103 | * average Q-value of validation set. 104 | 105 | To extract training statistics to file: 106 | 107 | ./extract_data 108 | 109 | This produces files `dqn/_history_A.csv` and `dqn/_history_B.csv`. These files contain following columns: 110 | * *Epoch* - testing phase number, divide by 2 to get true epoch, 111 | * *Average reward* - average reward per game during testing, 112 | * *Reward count* - total count of non-zero rewards during testing, 113 | * *Episode count* - number of games played during testing, 114 | * *MeanQ* - average W-value of validation set, 115 | * *TD Error* - temporal difference error, 116 | * *Seconds* - seconds since start. 117 | 118 | Plotting game statistics 119 | ------------------------ 120 | 121 | Plotting scripts are in folder `plots`. All `.csv` files from `dqn/` folder should be moved there for plotting. 122 | 123 | * `scatter.py` - plots for figure 7, uses `.csv` files, 124 | * `plot.py` - plots for figures 3 and 4, uses `Pong2Player.csv` and `Pong2PlayerVS.csv` files, 125 | * `plot_history.py` - plots for figure 8, uses `_history_A.csv` and `_history_B.csv` files. 126 | 127 | **NB!** Be sure to clean up `.csv` files as explained above. 128 | -------------------------------------------------------------------------------- /dqn/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | LIMITED LICENSE: 3 | 4 | Copyright (c) 2014 Google Inc. 5 | Limited License: Under no circumstance is commercial use, reproduction, or 6 | distribution permitted. Use, reproduction, and distribution are permitted 7 | solely for academic use in evaluating and reviewing claims made in 8 | "Human-level control through deep reinforcement learning", Nature 518, 529–533 9 | (26 February 2015) doi:10.1038/nature14236, provided that the following 10 | conditions are met: 11 | 12 | * Any reproduction or distribution of source code must retain the above 13 | copyright notice and the full text of this license including the following 14 | disclaimer.
 15 | 16 | * Any reproduction or distribution in binary form must reproduce the above 17 | copyright notice and the full text of this license including the following 18 | disclaimer
 in the documentation and/or other materials provided with the 19 | distribution. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /dqn/Rectifier.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Copyright (c) 2014 Google Inc. 3 | 4 | See LICENSE file for full terms of limited license. 5 | ]] 6 | 7 | --[[ Rectified Linear Unit. 8 | 9 | The output is max(0, input). 10 | --]] 11 | 12 | local Rectifier, parent = torch.class('nn.Rectifier', 'nn.Module') 13 | 14 | -- This module accepts minibatches 15 | function Rectifier:updateOutput(input) 16 | return self.output:resizeAs(input):copy(input):abs():add(input):div(2) 17 | end 18 | 19 | function Rectifier:updateGradInput(input, gradOutput) 20 | self.gradInput:resizeAs(self.output) 21 | return self.gradInput:sign(self.output):cmul(gradOutput) 22 | end -------------------------------------------------------------------------------- /dqn/Scale.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Copyright (c) 2014 Google Inc. 3 | 4 | See LICENSE file for full terms of limited license. 5 | ]] 6 | 7 | require "nn" 8 | require "image" 9 | 10 | local scale = torch.class('nn.Scale', 'nn.Module') 11 | 12 | 13 | function scale:__init(height, width) 14 | self.height = height 15 | self.width = width 16 | end 17 | 18 | function scale:forward(x) 19 | local x = x 20 | if x:dim() > 3 then 21 | x = x[1] 22 | end 23 | 24 | x = image.rgb2y(x) 25 | x = image.scale(x, self.width, self.height, 'bilinear') 26 | return x 27 | end 28 | 29 | function scale:updateOutput(input) 30 | return self:forward(input) 31 | end 32 | 33 | function scale:float() 34 | end 35 | -------------------------------------------------------------------------------- /dqn/convnet.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Copyright (c) 2014 Google Inc. 3 | 4 | See LICENSE file for full terms of limited license. 5 | ]] 6 | 7 | require "initenv" 8 | 9 | function create_network(args) 10 | local net = nn.Sequential() 11 | net:add(nn.Reshape(unpack(args.input_dims))) 12 | 13 | --- first convolutional layer 14 | local convLayer = nn.SpatialConvolution 15 | 16 | net:add(convLayer(args.hist_len*args.ncols, args.n_units[1], 17 | args.filter_size[1], args.filter_size[1], 18 | args.filter_stride[1], args.filter_stride[1],1)) 19 | net:add(args.nl()) 20 | 21 | -- Add convolutional layers 22 | for i=1,(#args.n_units-1) do 23 | -- second convolutional layer 24 | net:add(convLayer(args.n_units[i], args.n_units[i+1], 25 | args.filter_size[i+1], args.filter_size[i+1], 26 | args.filter_stride[i+1], args.filter_stride[i+1])) 27 | net:add(args.nl()) 28 | end 29 | 30 | local nel 31 | if args.gpu >= 0 then 32 | nel = net:cuda():forward(torch.zeros(1,unpack(args.input_dims)) 33 | :cuda()):nElement() 34 | else 35 | nel = net:forward(torch.zeros(1,unpack(args.input_dims))):nElement() 36 | end 37 | 38 | -- reshape all feature planes into a vector per example 39 | net:add(nn.Reshape(nel)) 40 | 41 | -- fully connected layer 42 | net:add(nn.Linear(nel, args.n_hid[1])) 43 | net:add(args.nl()) 44 | local last_layer_size = args.n_hid[1] 45 | 46 | for i=1,(#args.n_hid-1) do 47 | -- add Linear layer 48 | last_layer_size = args.n_hid[i+1] 49 | net:add(nn.Linear(args.n_hid[i], last_layer_size)) 50 | net:add(args.nl()) 51 | end 52 | 53 | -- add the last fully connected layer (to actions) 54 | net:add(nn.Linear(last_layer_size, args.n_actions)) 55 | 56 | if args.gpu >=0 then 57 | net:cuda() 58 | end 59 | if args.verbose >= 2 then 60 | --print(net) 61 | print('Convolutional layers flattened output size:', nel) 62 | end 63 | return net 64 | end 65 | -------------------------------------------------------------------------------- /dqn/convnet_atari3.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Copyright (c) 2014 Google Inc. 3 | 4 | See LICENSE file for full terms of limited license. 5 | ]] 6 | 7 | require 'convnet' 8 | 9 | return function(args) 10 | args.n_units = {32, 64, 64} 11 | args.filter_size = {8, 4, 3} 12 | args.filter_stride = {4, 2, 1} 13 | args.n_hid = {512} 14 | args.nl = nn.Rectifier 15 | 16 | return create_network(args) 17 | end 18 | 19 | -------------------------------------------------------------------------------- /dqn/extract_data.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require 'initenv' 3 | require 'cutorch' 4 | 5 | if #arg < 1 then 6 | print('Usage: ', arg[0], ' ') 7 | return 8 | end 9 | 10 | data = torch.load(arg[1]) 11 | print("Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds") 12 | for i=1,#data.v_history do 13 | print(table.concat({i, data.reward_history[i], data.reward_counts[i], 14 | data.episode_counts[i], data.v_history[i], data.td_history[i], data.time_history[i]},',')) 15 | end 16 | 17 | -------------------------------------------------------------------------------- /dqn/msleep.c: -------------------------------------------------------------------------------- 1 | /* 2 | * gcc -shared -fPIC -o msleep.so -I../torch/include -L../torch/lib msleep.c 3 | * -I and -l may vary on your computer. 4 | * Your computer may use something besides -fPIC 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | static int msleep_c(lua_State *L){ 13 | long msecs = lua_tointeger(L, -1); 14 | usleep(1000*msecs); 15 | return 0; /* No items returned */ 16 | } 17 | 18 | /* Can't name this sleep(), it conflicts with sleep() in unistd.h */ 19 | static int sleep_c(lua_State *L){ 20 | long secs = lua_tointeger(L, -1); 21 | sleep(secs); 22 | return 0; /* No items returned */ 23 | } 24 | 25 | /* Register both functions */ 26 | int luaopen_msleep(lua_State *L){ 27 | lua_register( L, "msleep", msleep_c); 28 | lua_register(L, "sleep", sleep_c); 29 | return 0; 30 | } 31 | 32 | -------------------------------------------------------------------------------- /dqn/net_downsample_2x_full_y.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Copyright (c) 2014 Google Inc. 3 | 4 | See LICENSE file for full terms of limited license. 5 | ]] 6 | 7 | require "image" 8 | require "Scale" 9 | 10 | local function create_network(args) 11 | -- Y (luminance) 12 | return nn.Scale(84, 84, true) 13 | end 14 | 15 | return create_network 16 | -------------------------------------------------------------------------------- /dqn/nnutils.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Copyright (c) 2014 Google Inc. 3 | 4 | See LICENSE file for full terms of limited license. 5 | ]] 6 | 7 | require "torch" 8 | 9 | function recursive_map(module, field, func) 10 | local str = "" 11 | if module[field] or module.modules then 12 | str = str .. torch.typename(module) .. ": " 13 | end 14 | if module[field] then 15 | str = str .. func(module[field]) 16 | end 17 | if module.modules then 18 | str = str .. "[" 19 | for i, submodule in ipairs(module.modules) do 20 | local submodule_str = recursive_map(submodule, field, func) 21 | str = str .. submodule_str 22 | if i < #module.modules and string.len(submodule_str) > 0 then 23 | str = str .. " " 24 | end 25 | end 26 | str = str .. "]" 27 | end 28 | 29 | return str 30 | end 31 | 32 | function abs_mean(w) 33 | return torch.mean(torch.abs(w:clone():float())) 34 | end 35 | 36 | function abs_max(w) 37 | return torch.abs(w:clone():float()):max() 38 | end 39 | 40 | -- Build a string of average absolute weight values for the modules in the 41 | -- given network. 42 | function get_weight_norms(module) 43 | return "\n\nWeight norms:\n" .. recursive_map(module, "weight", abs_mean) .. 44 | "\nWeight max:\n" .. recursive_map(module, "weight", abs_max) 45 | end 46 | 47 | -- Build a string of average absolute weight gradient values for the modules 48 | -- in the given network. 49 | function get_grad_norms(module) 50 | return "Weight grad norms:\n" .. 51 | recursive_map(module, "gradWeight", abs_mean) .. 52 | "\nWeight grad max:\n" .. recursive_map(module, "gradWeight", abs_max) 53 | end 54 | -------------------------------------------------------------------------------- /dqn/plot_results.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require 'initenv' 3 | require 'cutorch' 4 | require 'gnuplot' 5 | 6 | if #arg < 1 then 7 | print('Usage: ', arg[0], ' ') 8 | return 9 | end 10 | 11 | if #arg == 2 then agent= ' for agent' .. arg[2] else agent=' ' end 12 | data = torch.load(arg[1]) 13 | 14 | --gnuplot.raw('set multiplot layout 2, 3') 15 | 16 | gnuplot.figure() 17 | gnuplot.title('Average reward per game during testing'..agent) 18 | gnuplot.plot(torch.Tensor(data.reward_history)) 19 | 20 | gnuplot.figure() 21 | gnuplot.title('Total count of rewards during testing'..agent) 22 | gnuplot.plot(torch.Tensor(data.reward_counts)) 23 | 24 | gnuplot.figure() 25 | gnuplot.title('Number of games played during testing'..agent) 26 | gnuplot.plot(torch.Tensor(data.episode_counts)) 27 | 28 | gnuplot.figure() 29 | gnuplot.title('Average Q-value of validation set'..agent) 30 | gnuplot.plot(torch.Tensor(data.v_history)) 31 | 32 | --gnuplot.figure() 33 | --gnuplot.title('TD error (old and new Q-value difference) of validation set'..agent) 34 | --gnuplot.plot(torch.Tensor(data.td_history)) 35 | 36 | --gnuplot.figure() 37 | --gnuplot.title('Seconds elapsed after epoch'..agent) 38 | --gnuplot.plot(torch.Tensor(data.time_history)) 39 | 40 | --gnuplot.figure() 41 | --gnuplot.title('Qmax history') 42 | --gnuplot.plot(torch.Tensor(data.qmax_history)) 43 | 44 | -------------------------------------------------------------------------------- /dqn/test_2agent.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Copyright (c) 2014 Google Inc. 3 | 4 | See LICENSE file for full terms of limited license. 5 | ]] 6 | 7 | gd = require "gd" 8 | require "math" 9 | if not dqn then 10 | require "initenv" 11 | end 12 | 13 | local cmd = torch.CmdLine() 14 | cmd:text() 15 | cmd:text('TrainAgent in Environment:') 16 | cmd:text() 17 | cmd:text('Options:') 18 | 19 | cmd:option('-framework', '', 'name of training framework') 20 | cmd:option('-env', '', 'name of envirment to use') 21 | cmd:option('-game_path', '', 'path to environment file (ROM)') 22 | cmd:option('-env_params', '', 'string of environment parameters') 23 | cmd:option('-pool_frms', '', 24 | 'string of frame pooling parameters (e.g.: size=2,type="max")') 25 | cmd:option('-actrep', 1, 'how many times to repeat action') 26 | cmd:option('-random_starts', 0, 'play action 0 between 1 and random_starts ' .. 27 | 'number of times at the start of each training episode') 28 | 29 | cmd:option('-name', '', 'filename used for saving network and training history for agent 1') 30 | cmd:option('-nameB', '', 'filename used for saving network and training history for agent 2') 31 | cmd:option('-network', '', 'reload pretrained network for agent 1') 32 | cmd:option('-networkB', '', 'reload pretrained network for agent 2') 33 | cmd:option('-agent', '', 'name of agent file to use') 34 | cmd:option('-agent_params', '', 'string of agent parameters') 35 | cmd:option('-seed', 3, 'fixed input seed for repeatable experiments') 36 | 37 | cmd:option('-verbose', 2, 38 | 'the higher the level, the more information is printed to screen') 39 | cmd:option('-threads', 1, 'number of BLAS threads') 40 | cmd:option('-gpu', -1, 'gpu flag') 41 | cmd:option('-gif_file', '', 'GIF path to write session screens') 42 | cmd:option('-csv_file', '', 'CSV path to write session data') 43 | cmd:option('-version', '', 'epoch of training') 44 | cmd:option('-datas_file', '', 'CSV path to write learning evaluation data') 45 | cmd:text() 46 | 47 | local opt = cmd:parse(arg) 48 | local clock = os.clock 49 | --- General setup. 50 | local game_env, game_actions,game_actionsB, agent,agentB, opt,optB = setup2(opt) 51 | 52 | -- override print to always flush the output 53 | local old_print = print 54 | local print = function(...) 55 | old_print(...) 56 | io.flush() 57 | end 58 | 59 | local version=opt.version 60 | -- file names from command line 61 | local gif_filename = opt.gif_file 62 | local csv_filename = opt.csv_file 63 | local datas_filename=opt.datas_file 64 | print(gif_filename, csv_filename, datas_filename) 65 | 66 | -- start a new game 67 | local screen, rewardA,rewardB, terminal = game_env:newGame2() 68 | 69 | -- compress screen to JPEG with 100% quality 70 | local jpg = image.compressJPG(screen:squeeze(), 100) 71 | -- create gd image from JPEG string 72 | local im = gd.createFromJpegStr(jpg:storage():string()) 73 | -- convert truecolor to palette 74 | im:trueColorToPalette(false, 256) 75 | 76 | -- write GIF header, use global palette and infinite looping 77 | im:gifAnimBegin(gif_filename, true, 0) 78 | -- write first frame 79 | im:gifAnimAdd(gif_filename, false, 0, 0, 7, gd.DISPOSAL_NONE) 80 | 81 | -- remember the image and show it first 82 | local previm = im 83 | local win = image.display({image=screen}) 84 | 85 | -- open CSV file for writing and write header 86 | local csv_file = assert(io.open(csv_filename, "w")) 87 | csv_file:write('actionA;ActionB;max_qvalueA;max_qvalueB;rewardA;rewardB;terminal\n') 88 | local datas_file = assert(io.open(datas_filename, "a+")) 89 | if opt.seed==1 then datas_file:write('training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB\n') end 90 | print("Started playing...") 91 | previousScore=0 92 | totalSideBounce=0 93 | previousWallBounce=false 94 | totalWallBounce=0 95 | previousSideBounce=0 96 | servingTime=0 97 | totalRewardA = 0 98 | totalRewardB = 0 99 | -- play one episode (game) 100 | while not terminal do 101 | -- if action was chosen randomly, Q-value is 0 102 | agent.bestq = 0 103 | agentB.bestq = 0 104 | 105 | -- choose the best action 106 | local action_index = agent:perceive(rewardA, screen, terminal, true, 0.01) 107 | local action_indexB = agentB:perceive(rewardB, screen, terminal, true, 0.01) 108 | --if agent.bestq == 0 then 109 | -- print("A random action: " .. action_index) 110 | --else 111 | -- print("A agent action: " .. action_index) 112 | --end 113 | -- play game in test mode (episodes don't end when losing a life) 114 | screen, rewardA,rewardB, terminal, sideBouncing,wallBouncing,points,crash,serving = game_env:step2(game_actions[action_index],game_actionsB[action_indexB], false) 115 | if crash then 116 | print("CRASHED!!!") 117 | break 118 | end 119 | if rewardA ~= 0 or rewardB ~= 0 then 120 | print(rewardA, rewardB, points) 121 | end 122 | totalRewardA = totalRewardA + rewardA 123 | totalRewardB = totalRewardB + rewardB 124 | --gather statisticts for one ball 125 | -- wallbouncing true when the ball is touching the wall, but we want to count only when it turn true 126 | if (wallBouncing==true and previousWallBounce==false) then 127 | totalWallBounce=totalWallBounce+1 128 | end 129 | previousWallBounce=wallBouncing 130 | 131 | if (previousSideBounce${1}_history_A.csv 20 | ../torch/bin/luajit extract_data.lua $agentb_file >${1}_history_B.csv 21 | 22 | -------------------------------------------------------------------------------- /install_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ###################################################################### 4 | # Torch install 5 | ###################################################################### 6 | 7 | 8 | TOPDIR=$PWD 9 | 10 | # Prefix: 11 | PREFIX=$PWD/torch 12 | echo "Installing Torch into: $PREFIX" 13 | 14 | if [[ `uname` != 'Linux' ]]; then 15 | echo 'Platform unsupported, only available for Linux' 16 | exit 17 | fi 18 | if [[ `which apt-get` == '' ]]; then 19 | echo 'apt-get not found, platform not supported' 20 | exit 21 | fi 22 | 23 | # Install dependencies for Torch: 24 | sudo apt-get update 25 | sudo apt-get install -qqy build-essential 26 | sudo apt-get install -qqy gcc g++ 27 | sudo apt-get install -qqy cmake 28 | sudo apt-get install -qqy curl 29 | sudo apt-get install -qqy libreadline-dev 30 | sudo apt-get install -qqy git-core 31 | sudo apt-get install -qqy libjpeg-dev 32 | sudo apt-get install -qqy libpng-dev 33 | sudo apt-get install -qqy ncurses-dev 34 | sudo apt-get install -qqy imagemagick 35 | sudo apt-get install -qqy unzip 36 | sudo apt-get install -qqy libqt4-dev 37 | sudo apt-get update 38 | 39 | 40 | echo "==> Torch7's dependencies have been installed" 41 | 42 | 43 | 44 | 45 | 46 | # Build and install Torch7 47 | mkdir -p $PREFIX/src 48 | cd $PREFIX/src 49 | rm -rf luajit-rocks 50 | git clone https://github.com/torch/luajit-rocks.git 51 | cd luajit-rocks 52 | mkdir -p build 53 | cd build 54 | git checkout master; git pull 55 | rm -f CMakeCache.txt 56 | cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_BUILD_TYPE=Release 57 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi 58 | make 59 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi 60 | make install 61 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi 62 | 63 | 64 | path_to_nvcc=$(which nvcc) 65 | if [ -x "$path_to_nvcc" ] 66 | then 67 | cutorch=ok 68 | cunn=ok 69 | fi 70 | 71 | # Install base packages: 72 | $PREFIX/bin/luarocks install cwrap 73 | $PREFIX/bin/luarocks install paths 74 | $PREFIX/bin/luarocks install torch 75 | $PREFIX/bin/luarocks install nn 76 | 77 | [ -n "$cutorch" ] && \ 78 | ($PREFIX/bin/luarocks install cutorch) 79 | [ -n "$cunn" ] && \ 80 | ($PREFIX/bin/luarocks install cunn) 81 | 82 | $PREFIX/bin/luarocks install luafilesystem 83 | $PREFIX/bin/luarocks install penlight 84 | $PREFIX/bin/luarocks install sys 85 | $PREFIX/bin/luarocks install xlua 86 | $PREFIX/bin/luarocks install image 87 | $PREFIX/bin/luarocks install env 88 | $PREFIX/bin/luarocks install qtlua 89 | $PREFIX/bin/luarocks install qttorch 90 | $PREFIX/bin/luarocks install luagd 91 | 92 | echo "" 93 | echo "=> Torch7 has been installed successfully" 94 | echo "" 95 | 96 | 97 | echo "Installing nngraph ... " 98 | $PREFIX/bin/luarocks install nngraph 99 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi 100 | echo "nngraph installation completed" 101 | 102 | echo "Installing Xitari ... " 103 | cd $PREFIX/src 104 | rm -rf xitari 105 | git clone https://github.com/NeuroCSUT/Xitari2Player.git xitari 106 | cd xitari 107 | $PREFIX/bin/luarocks make 108 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi 109 | echo "Xitari installation completed" 110 | 111 | echo "Installing Alewrap ... " 112 | cd $PREFIX/src 113 | rm -rf alewrap 114 | git clone https://github.com/NeuroCSUT/Alewrap2Player.git alewrap 115 | cd alewrap 116 | $PREFIX/bin/luarocks make 117 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi 118 | echo "Alewrap installation completed" 119 | 120 | echo "Installing Lua-GD ... " 121 | cd $PREFIX/src 122 | rm -rf lua-gd 123 | git clone https://github.com/ittner/lua-gd.git 124 | cd lua-gd 125 | sed -i 's/LUABIN=lua5.1/LUABIN=..\/..\/bin\/luajit/' Makefile 126 | sed -i 's/`pkg-config \$(LUAPKG) --cflags`/-I..\/..\/include/' Makefile 127 | $PREFIX/bin/luarocks make 128 | RET=$?; if [ $RET -ne 0 ]; then echo "Error. Exiting."; exit $RET; fi 129 | echo "Lua-GD installation completed" 130 | 131 | echo 132 | echo "You can run experiments by executing: " 133 | echo 134 | echo " ./run_cpu game_name" 135 | echo 136 | echo " or " 137 | echo 138 | echo " ./run_gpu game_name" 139 | echo 140 | echo "For this you need to provide the rom files of the respective games (game_name.bin) in the roms/ directory" 141 | echo 142 | 143 | -------------------------------------------------------------------------------- /plot_2results: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" -o -z "$2" ] 4 | then echo "Please provide the name of the game, e.g. $0 breakout "; exit 0 5 | fi 6 | 7 | agent_type="DQN3_0_1" 8 | agent_name=$agent_type"_"$1"_FULL_Y_A_"$2 9 | agent_nameB=$agent_type"_"$1"_FULL_Y_B_"$2 10 | network_file="$agent_name.t7" 11 | network_fileB="$agent_nameB.t7" 12 | 13 | cd dqn 14 | ../torch/bin/luajit plot_results.lua $network_file '1' 15 | ../torch/bin/luajit plot_results.lua $network_fileB '2' 16 | -------------------------------------------------------------------------------- /plot_results: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. $0 breakout [version]"; exit 0 5 | fi 6 | 7 | agent_type="DQN3_0_1" 8 | 9 | agent_name=$agent_type"_"$1"_FULL_Y" 10 | 11 | if [ "$2" ] 12 | then agent_name=$agent_name"_"$2 13 | fi 14 | 15 | cd dqn 16 | ../torch/bin/luajit plot_results.lua $network_file 17 | 18 | -------------------------------------------------------------------------------- /plots/Pong2Player0.csv: -------------------------------------------------------------------------------- 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB 2 | 49;1;113;144;31;40596;-10;-21 3 | 49;2;83;110;26;54180;-16;-10 4 | 49;3;123;150;33;50904;-13;-20 5 | 49;4;87;118;28;53392;-14;-14 6 | 49;5;86;119;32;41276;-11;-21 7 | 49;6;68;95;22;55760;-11;-11 8 | 49;7;107;131;30;39684;-9;-21 9 | 49;8;103;138;31;41120;-10;-21 10 | 49;9;119;134;30;50664;-9;-21 11 | 49;10;106;132;32;41612;-11;-21 12 | -------------------------------------------------------------------------------- /plots/Pong2Player025.csv: -------------------------------------------------------------------------------- 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB 2 | 49;1;100;178;35;38308;-24,5;-19,25 3 | 49;2;84;126;28;52024;-21,25;-13,75 4 | 49;3;83;151;30;50204;-21;-16,5 5 | 49;4;87;153;27;51320;-18,75;-15 6 | 49;5;117;169;32;47916;-23,75;-16,25 7 | 49;6;63;103;21;54852;-15,75;-10,5 8 | 49;7;76;150;28;50944;-17,5;-17,5 9 | 49;8;88;158;29;49432;-18,5;-17,75 10 | 49;9;81;133;23;52328;-17,75;-11 11 | 49;10;101;161;32;49024;-21,5;-18,5 12 | -------------------------------------------------------------------------------- /plots/Pong2Player025_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_average_reward.png -------------------------------------------------------------------------------- /plots/Pong2Player025_episode_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_episode_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player025_history_A.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-16.175,568,20,-0.039221713870764,0.029145060969517,0 3 | 2,-5.25,1808,86,-0.08608803910017,0.024464469596744,2145.1178991795 4 | 3,-7.89,595,25,-0.16516388082504,0.036043447315693,5047.8407013416 5 | 4,-17.1375,563,20,-0.21209482467175,0.024275738954544,7975.1648442745 6 | 5,-13.725,559,20,-0.25724805343151,0.023471528768539,10931.119163036 7 | 6,-18.09375,882,32,-0.28720419490337,0.039458050966263,13900.719771862 8 | 7,-5.25,1807,86,-0.29875303316116,0.035284322023392,16899.678385973 9 | 8,-5.9309210526316,1643,76,-0.30827509558201,0.027181980490685,19909.909010172 10 | 9,-17.652173913043,554,23,-0.34414884364605,0.023285815060139,22939.618853092 11 | 10,-24.25,493,14,-0.45101404255629,0.026651228666306,25973.788203239 12 | 11,-17.8,566,20,-0.52390958654881,0.029277494549751,29014.436220407 13 | 12,-18.814814814815,729,27,-0.54987085461617,0.039004881381989,32054.2854774 14 | 13,-22.7625,569,20,-0.54759183239937,0.037574471831322,35099.126308441 15 | 14,-21.826086956522,573,23,-0.5414869966507,0.031009385406971,38136.861510515 16 | 15,-23.363636363636,687,22,-0.53737711763382,0.029484157204628,41182.874017715 17 | 16,-21.93,647,25,-0.55663867473602,0.028866673767567,44221.490203857 18 | 17,-20.539473684211,672,19,-0.55051146292686,0.029257072925568,47265.278861761 19 | 18,-21.64,597,25,-0.5600018157959,0.028754609167576,50302.363017797 20 | 19,-22.836538461538,755,26,-0.528419293046,0.032014802247286,53345.915400743 21 | 20,-19.066666666667,500,15,-0.55494042682648,0.039873561859131,56382.528666735 22 | 21,-23.796875,554,16,-0.60025869548321,0.029821652859449,59421.726590633 23 | 22,-21.967391304348,581,23,-0.60584501338005,0.032700836449862,62458.533436537 24 | 23,-22.736111111111,504,18,-0.61955435323715,0.033058366805315,65503.937287807 25 | 24,-24.214285714286,528,14,-0.56297829425335,0.035921178489923,68542.928196669 26 | 25,-21.807692307692,463,13,-0.56452477037907,0.03643785020709,71586.370691776 27 | 26,-17.3,492,15,-0.51090985023975,0.035625722199678,74622.337754726 28 | 27,-20.346153846154,444,13,-0.43836925184727,0.03375756187737,77666.558803558 29 | 28,-22.55,399,10,-0.43404640996456,0.031462731868029,80709.149359703 30 | 29,-23.2,396,10,-0.41233416175842,0.035647051796317,83754.154893875 31 | 30,-23.431818181818,415,11,-0.38006203866005,0.032858549267054,86794.799460888 32 | 31,-23.125,403,10,-0.35445768916607,0.031531541958451,89839.270530939 33 | 32,-23,391,10,-0.30434643936157,0.034153116598725,92875.915857077 34 | 33,-22.75,396,10,-0.28986469483376,0.035212990507483,95919.116505146 35 | 34,-24.575,382,10,-0.26612943577766,0.032227400070056,98955.592324972 36 | 35,-23.25,392,13,-0.26001595187187,0.033032483366318,102001.58251119 37 | 36,-24.8,382,10,-0.27397701132298,0.041171750120819,105041.78489709 38 | 37,-24.229166666667,409,12,-0.25113397979736,0.033362254701555,108088.16415405 39 | 38,-22.725,389,10,-0.26228211903572,0.032622256435454,111126.96911716 40 | 39,-23.840909090909,367,11,-0.23275652563572,0.032861182622612,114174.376688 41 | 40,-24.472222222222,367,9,-0.23278466582298,0.028766998458654,117212.66692805 42 | 41,-24.333333333333,353,9,-0.23536169910431,0.031785745821893,120277.53845716 43 | 42,-22.75,365,9,-0.23552758347988,0.029502598330379,123339.66571617 44 | 43,-24.25,364,10,-0.26656704473495,0.032324158191681,126401.14022613 45 | 44,-23.575,352,10,-0.23555584084988,0.028791207253933,129460.99070621 46 | 45,-24.3,354,10,-0.21958861577511,0.032881412982941,132517.0496223 47 | 46,-23.875,352,10,-0.21235245406628,0.032139036647975,135574.30851746 48 | 47,-23.5,370,10,-0.19236077666283,0.029558058961295,138640.52359223 49 | 48,-24.5,333,9,-0.20597528707981,0.029206026017666,141701.27262425 50 | 49,-24.6,354,10,-0.2066180062294,0.032127544768155,144765.86236525 51 | 50,-24.472222222222,329,9,-0.20180555462837,0.030132573431358,147825.13783622 52 | 51,-24.916666666667,368,9,-0.17858661842346,0.031521749011008,150881.76423216 53 | 52,-22.416666666667,358,9,-0.18114964962006,0.028377236567438,153922.41813803 54 | 53,-23.5,344,10,-0.18467670333385,0.028995035981759,156972.89826202 55 | 54,-23.277777777778,344,9,-0.1676502161026,0.029776763111586,160014.81617689 56 | 55,-24.083333333333,341,9,-0.1743116055727,0.027832619239576,163061.01360178 57 | 56,-22.388888888889,333,9,-0.16487859320641,0.027677313060616,166101.79607463 58 | 57,-22.96875,331,8,-0.1488944196701,0.027799856112804,169148.1457777 59 | 58,-24.166666666667,343,9,-0.13687021172047,0.027575396563159,172210.88137054 60 | 59,-23.111111111111,346,9,-0.15149549734592,0.02638567374018,175278.51135159 61 | 60,-23.4375,334,8,-0.14469087707996,0.028253893780871,178342.54405665 62 | 61,-23.75,329,9,-0.15923279857635,0.028194830140797,181408.60506177 63 | 62,-22.8125,330,8,-0.13924461603165,0.02555070837657,184469.28358388 64 | 63,-25.194444444444,342,9,-0.10861786818504,0.026180025865324,187535.17608976 65 | 64,-24.5,326,8,-0.092891018033028,0.030780095353723,190594.06337309 66 | 65,-24.333333333333,338,9,-0.086945027470589,0.03013668980822,193661.88482189 67 | 66,-23.625,322,8,-0.10515263080597,0.029240502325818,196724.74574399 68 | 67,-23.888888888889,327,9,-0.097472587585449,0.026558110132813,199790.20792198 69 | 68,-24.166666666667,326,9,-0.08769202375412,0.030075371234678,202857.40764117 70 | 69,-23.1875,325,8,-0.089009696483612,0.030123330419883,205922.43129015 71 | 70,-24.375,349,10,-0.076255015015602,0.031035634227097,208987.50090623 72 | 71,-23.825,367,10,-0.082351405143738,0.026536399340257,212053.49057627 73 | 72,-22.222222222222,328,9,-0.10591663193703,0.027814963047858,215114.73465633 74 | 73,-24.5,326,8,-0.10250155234337,0.027341711520217,218183.40867519 75 | 74,-22.59375,303,8,-0.10880286765099,0.026504224833101,221227.51655221 76 | 75,-23.90625,324,8,-0.10862943220139,0.027639281807467,224292.29700541 77 | 76,-23.28125,304,8,-0.12024115896225,0.026453432897106,227351.26816535 78 | 77,-23.65625,293,8,-0.12196779179573,0.029779008532409,230415.81299853 79 | 78,-22.03125,313,8,-0.11778739881516,0.027597792163957,233474.80665731 80 | 79,-21.777777777778,318,9,-0.1045371119976,0.029160832708701,236540.2464354 81 | 80,-24.65625,311,8,-0.099472584724426,0.029115197545849,239600.92933631 82 | 81,-21.90625,312,8,-0.087682006239891,0.02894111315161,242665.77772045 83 | 82,-21.96875,311,8,-0.082646775841713,0.024405353136361,245729.7993753 84 | 83,-22.78125,309,8,-0.075779815554619,0.027971854389645,248799.43783331 85 | 84,-23.722222222222,316,9,-0.066599355697632,0.025790769956075,251859.77453947 86 | 85,-24.28125,311,8,-0.039750346183777,0.026809580738656,254927.8234973 87 | 86,-22.09375,302,8,-0.052736337065697,0.025766940789297,257985.59742713 88 | 87,-17.472222222222,327,9,-0.041633208632469,0.024616640799097,261037.21990418 89 | 88,-22.71875,307,8,-0.033747999787331,0.025986618170049,264102.1203053 90 | 89,-19.96875,307,8,-0.018715925335884,0.025599762066267,267167.29295135 91 | 90,-24.25,308,8,-0.016242628574371,0.029913896901067,270228.33542633 92 | 91,-24.025,348,10,-0.014869958758354,0.027084466501838,273296.45201826 93 | 92,-24.5,309,8,-0.018914625287056,0.02361085549579,276342.35304213 94 | 93,-21.75,304,8,-0.0033151375055313,0.024955295423511,279391.21635818 95 | 94,-23.90625,304,8,-0.0086528804302216,0.026489283834584,282433.16600633 96 | -------------------------------------------------------------------------------- /plots/Pong2Player025_history_B.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-18.3875,568,20,-0.047463739871979,0.019119021207094,0 3 | 2,-21,1808,86,-0.11923271918297,0.017456383809447,2145.1178991795 4 | 3,-21.36,595,25,-0.18175252383947,0.017800595074892,5047.8407013416 5 | 4,-17.3625,563,20,-0.24600265741348,0.017269527733326,7975.1648442745 6 | 5,-20.2125,559,20,-0.28691787457466,0.034651648104191,10931.119163036 7 | 6,-15.7734375,882,32,-0.30191461646557,0.034897827267647,13900.719771862 8 | 7,-21,1807,86,-0.31138520777225,0.037016086220741,16899.678385973 9 | 8,-21.059210526316,1643,76,-0.27793198502064,0.024718726277351,19909.909010172 10 | 9,-12.402173913043,554,23,-0.32790268707275,0.029424214139581,22939.618853092 11 | 10,-19.589285714286,493,14,-0.43140547662973,0.040308959618211,25973.788203239 12 | 11,-17.3875,566,20,-0.42355911284685,0.03875673687458,29014.436220407 13 | 12,-14.425925925926,729,27,-0.37147549843788,0.036230552509427,32054.2854774 14 | 13,-12.3,569,20,-0.40821820640564,0.03606599791348,35099.126308441 15 | 14,-8.554347826087,573,23,-0.45022543692589,0.047203338369727,38136.861510515 16 | 15,-14.875,687,22,-0.42852363598347,0.040189081296325,41182.874017715 17 | 16,-8.97,647,25,-0.48244062685966,0.036945415109396,44221.490203857 18 | 17,-21.368421052632,672,19,-0.48765191411972,0.035779230415821,47265.278861761 19 | 18,-7.81,597,25,-0.50453666257858,0.038317348569632,50302.363017797 20 | 19,-12.596153846154,755,26,-0.54132361984253,0.034977702558041,53345.915400743 21 | 20,-20.516666666667,500,15,-0.54874300312996,0.034060227304697,56382.528666735 22 | 21,-17.140625,554,16,-0.58508399951458,0.037577494502068,59421.726590633 23 | 22,-9.1195652173913,581,23,-0.51264390897751,0.037798419475555,62458.533436537 24 | 23,-12.194444444444,504,18,-0.50770425796509,0.037071759358048,65503.937287807 25 | 24,-20.517857142857,528,14,-0.44087818443775,0.0358794843629,68542.928196669 26 | 25,-22.038461538462,463,13,-0.41854939472675,0.035565362289548,71586.370691776 27 | 26,-21.7,492,15,-0.37197616314888,0.033765547022223,74622.337754726 28 | 27,-21.961538461538,444,13,-0.30863987386227,0.037052399717271,77666.558803558 29 | 28,-22.7,399,10,-0.30608900702,0.035842268519104,80709.149359703 30 | 29,-24.925,396,10,-0.31908070909977,0.037388368692249,83754.154893875 31 | 30,-21.795454545455,415,11,-0.33511826062202,0.03828813894093,86794.799460888 32 | 31,-23.875,403,10,-0.31773330914974,0.043416036142036,89839.270530939 33 | 32,-22.625,391,10,-0.29725316429138,0.037307736651972,92875.915857077 34 | 33,-23.875,396,10,-0.27369178593159,0.037329317059368,95919.116505146 35 | 34,-20.675,382,10,-0.23598987996578,0.039341125553474,98955.592324972 36 | 35,-14.25,392,13,-0.23570065188408,0.0370699272817,102001.58251119 37 | 36,-22.7,382,10,-0.22717701399326,0.0369602432549,105041.78489709 38 | 37,-18.166666666667,409,12,-0.20643604362011,0.033634546946734,108088.16415405 39 | 38,-21.9,389,10,-0.19385940217972,0.040584698152728,111126.96911716 40 | 39,-16.613636363636,367,11,-0.17805164909363,0.036598243892659,114174.376688 41 | 40,-21.222222222222,367,9,-0.16340972304344,0.0363655543942,117212.66692805 42 | 41,-21.916666666667,353,9,-0.13660684776306,0.03570253569819,120277.53845716 43 | 42,-22.666666666667,365,9,-0.12664770591259,0.030618733669631,123339.66571617 44 | 43,-18.625,364,10,-0.11581337034702,0.035453151194379,126401.14022613 45 | 44,-16.675,352,10,-0.13645259201527,0.032029403009452,129460.99070621 46 | 45,-19.575,354,10,-0.11208348071575,0.039308571979403,132517.0496223 47 | 46,-18.25,352,10,-0.077307417631149,0.034906720773317,135574.30851746 48 | 47,-20.5,370,10,-0.067142282962799,0.032025041517802,138640.52359223 49 | 48,-20.083333333333,333,9,-0.050315158486366,0.032454478519969,141701.27262425 50 | 49,-19.65,354,10,-0.029398714721203,0.032754215477034,144765.86236525 51 | 50,-19.555555555556,329,9,-0.061822235167027,0.037998179838527,147825.13783622 52 | 51,-21.333333333333,368,9,-0.081514114260674,0.031815447958186,150881.76423216 53 | 52,-23,358,9,-0.058856061398983,0.034865049999207,153922.41813803 54 | 53,-18.625,344,10,-0.065357940196991,0.035583984130528,156972.89826202 55 | 54,-23.111111111111,344,9,-0.081974079191685,0.033413144109771,160014.81617689 56 | 55,-19.25,341,9,-0.097408960103989,0.032794755496085,163061.01360178 57 | 56,-23.305555555556,333,9,-0.088108561575413,0.03458730529435,166101.79607463 58 | 57,-24.375,331,8,-0.099771278560162,0.035198075203225,169148.1457777 59 | 58,-22.5,343,9,-0.11109308534861,0.032190475354437,172210.88137054 60 | 59,-22.027777777778,346,9,-0.080554970800877,0.032436789471656,175278.51135159 61 | 60,-23.4375,334,8,-0.079262801647186,0.029142876271158,178342.54405665 62 | 61,-21.25,329,9,-0.082398749470711,0.034636469038203,181408.60506177 63 | 62,-23.28125,330,8,-0.063544670343399,0.03176398887625,184469.28358388 64 | 63,-22.027777777778,342,9,-0.066933371722698,0.031416806509718,187535.17608976 65 | 64,-21.125,326,8,-0.054739053189754,0.030579170981422,190594.06337309 66 | 65,-19.833333333333,338,9,-0.05774846214056,0.030011123730801,193661.88482189 67 | 66,-22.78125,322,8,-0.081116970300674,0.02639879387524,196724.74574399 68 | 67,-19.722222222222,327,9,-0.068454475164413,0.030450439533219,199790.20792198 69 | 68,-19.583333333333,326,9,-0.045518710792065,0.029313153597061,202857.40764117 70 | 69,-22.4375,325,8,-0.064951929867268,0.028022845759057,205922.43129015 71 | 70,-18.75,349,10,-0.043183997392654,0.028989451053552,208987.50090623 72 | 71,-20.3,367,10,-0.025602949619293,0.029179670328042,212053.49057627 73 | 72,-23.055555555556,328,9,-0.041780498087406,0.030262483103201,215114.73465633 74 | 73,-21.59375,326,8,-0.065926621258259,0.026754813395441,218183.40867519 75 | 74,-22.875,303,8,-0.062578474581242,0.026043576397933,221227.51655221 76 | 75,-22.03125,324,8,-0.073804795444012,0.029437688516453,224292.29700541 77 | 76,-22.34375,304,8,-0.067518562853336,0.027564628321677,227351.26816535 78 | 77,-21.5,293,8,-0.068931222259998,0.029230595620349,230415.81299853 79 | 78,-23.4375,313,8,-0.078792982280254,0.028379039938562,233474.80665731 80 | 79,-22.111111111111,318,9,-0.055398044526577,0.029704592891037,236540.2464354 81 | 80,-20.34375,311,8,-0.058394014656544,0.026572366565699,239600.92933631 82 | 81,-24.34375,312,8,-0.042984465479851,0.029344257420395,242665.77772045 83 | 82,-23.65625,311,8,-0.032085248291492,0.026987537696492,245729.7993753 84 | 83,-24.09375,309,8,-0.026669816911221,0.028634731254075,248799.43783331 85 | 84,-19.055555555556,316,9,-0.023739550888538,0.027961498648685,251859.77453947 86 | 85,-20.25,311,8,-0.02610385787487,0.028155951410998,254927.8234973 87 | 86,-24.625,302,8,-0.036636304974556,0.029357400156558,257985.59742713 88 | 87,-24.055555555556,327,9,-0.041067724645138,0.029178894826677,261037.21990418 89 | 88,-22.90625,307,8,-0.023426564395428,0.028285281694261,264102.1203053 90 | 89,-24.09375,307,8,-0.02995936101675,0.028055407757871,267167.29295135 91 | 90,-19.1875,308,8,-0.037269575536251,0.027009723492898,270228.33542633 92 | 91,-17.35,348,10,-0.051642109036446,0.028042263409123,273296.45201826 93 | 92,-22.0625,309,8,-0.035263738811016,0.027735443314305,276342.35304213 94 | 93,-21.84375,304,8,-0.015153986394405,0.028466824442614,279391.21635818 95 | 94,-19.6875,304,8,-0.015177275419235,0.026481435511378,282433.16600633 96 | -------------------------------------------------------------------------------- /plots/Pong2Player025_meanq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_meanq.png -------------------------------------------------------------------------------- /plots/Pong2Player025_reward_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_reward_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player025_tderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_tderror.png -------------------------------------------------------------------------------- /plots/Pong2Player025_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025_time.png -------------------------------------------------------------------------------- /plots/Pong2Player025p.csv: -------------------------------------------------------------------------------- 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB 2 | 49;1;177;156;32;28400;-5,75;-18,25 3 | 49;2;226;191;38;45716;-15,5;-13 4 | 49;3;212;181;39;45024;-16,5;-12,75 5 | 49;4;159;155;32;40044;-5,75;-18,25 6 | 49;5;165;138;38;44924;-11,75;-16,75 7 | 49;6;136;119;29;28016;-2,75;-19 8 | 49;7;150;129;33;36964;-6,75;-18 9 | 49;8;188;180;39;32700;-12,75;-16,5 10 | 49;9;160;160;34;46008;-7,75;-17,75 11 | 49;10;170;162;36;38864;-9,75;-17,25 12 | -------------------------------------------------------------------------------- /plots/Pong2Player025p_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_average_reward.png -------------------------------------------------------------------------------- /plots/Pong2Player025p_episode_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_episode_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player025p_history_A.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-12.352941176471,547,17,0.00082937029004097,0.027333959957585,0 3 | 2,3.28,568,25,-0.015955855160952,0.024399156646803,1853.2487101555 4 | 3,-10.745,1587,50,-0.04002983725071,0.023911268580705,4738.9991998672 5 | 4,-13.666666666667,576,21,-0.021115227997303,0.027273571142927,7648.6803650856 6 | 5,4.9786585365854,1740,82,-0.052541925430298,0.026042224757373,10568.621224165 7 | 6,-2.2045454545455,567,22,-0.047908736944199,0.028969800889492,13514.659698248 8 | 7,-14.1484375,923,32,-0.050299361824989,0.028472188197076,16481.173532248 9 | 8,1,582,24,-0.022397070169449,0.02070509136375,19472.213679314 10 | 9,-19.891304347826,586,23,-0.041784821867943,0.024664424210321,22485.965064049 11 | 10,-14.4375,567,20,-0.063855166316032,0.030764868427068,25520.983683109 12 | 11,-19.011363636364,553,22,-0.09504049217701,0.03342449611146,28549.404037952 13 | 12,-16.329545454545,726,22,-0.12252291846275,0.03835753168771,31593.315760136 14 | 13,-9.8425925925926,850,27,-0.12895621919632,0.034671363378526,34623.699748039 15 | 14,-6.1666666666667,517,15,-0.14886118376255,0.032452720825095,37662.579718828 16 | 15,-17.038461538462,728,26,-0.13114638340473,0.033712715710048,40696.674022913 17 | 16,-19,566,19,-0.19647386515141,0.03248389304895,43748.12767005 18 | 17,-19.913265306122,1244,49,-0.24450411760807,0.035744567394257,46811.069835186 19 | 18,-17.763157894737,1005,38,-0.29554573619366,0.03675818355009,49869.176441431 20 | 19,-19.454545454545,598,22,-0.27713277065754,0.056914875906426,52920.243164301 21 | 20,-14.986111111111,714,18,-0.28785525941849,0.061606936244294,55978.05390048 22 | 21,-12.513157894737,704,19,-0.31740967047215,0.040825152235106,59030.078924179 23 | 22,-19.875,1139,44,-0.31587430310249,0.041830532593653,62078.087513208 24 | 23,-11.973684210526,709,19,-0.30709554183483,0.036709113527089,65080.992939234 25 | 24,-10.342105263158,598,19,-0.33734118914604,0.040537785271183,68080.805202246 26 | 25,-6.3214285714286,929,28,-0.27032305586338,0.037287141215988,71091.365906477 27 | 26,-16.111111111111,696,18,-0.21350250959396,0.036020501473453,74456.818608284 28 | 27,-16.039473684211,691,19,-0.20027073895931,0.037381986066233,77459.906303406 29 | 28,-18.794642857143,847,28,-0.16577923822403,0.036279377653729,80472.901323557 30 | 29,-16.321428571429,519,14,-0.16614373016357,0.036354737143032,83495.177422523 31 | 30,-17.575,686,20,-0.1629597299099,0.04528989072144,86509.107819557 32 | 31,-11.5,427,11,-0.13156876277924,0.037213211272843,89516.892535448 33 | 32,-11.416666666667,439,12,-0.12611813902855,0.035730781964026,92509.944949389 34 | 33,-15.692307692308,493,13,-0.11401822388172,0.034237996558892,95515.364245415 35 | 34,-16.692307692308,500,13,-0.050775911331177,0.037290098330937,98538.464844465 36 | 35,-10.134615384615,490,13,0.011082951903343,0.035221740954556,101545.67380142 37 | 36,-13.3125,489,12,0.05101346039772,0.033417437257245,104565.05976629 38 | 37,-15.522727272727,415,11,0.05477943277359,0.035748205714393,107557.31085825 39 | 38,-11.769230769231,449,13,0.03531555891037,0.030885013681836,110563.57991529 40 | 39,-12.308823529412,643,17,0.0051241598129272,0.030626845588908,113553.20760727 41 | 40,-15.340909090909,439,11,0.024405554294586,0.034930394763709,116573.10384536 42 | 41,-9.55,539,15,0.015237793087959,0.031570149709005,119602.37818432 43 | 42,-13.388888888889,360,9,0.020310340762138,0.033570738155628,122643.66008639 44 | 43,-11.791666666667,481,12,0.044084054112434,0.03250115283113,125665.90826011 45 | 44,-12,420,11,0.064705698490143,0.033147096188273,128680.62530828 46 | 45,-14.068181818182,407,11,0.068985550761223,0.033349006434903,131705.14903736 47 | 46,-14.525,372,10,0.051267961621284,0.032088694971055,134712.71475434 48 | 47,-18.1875,396,12,0.050818180918694,0.050975416313857,137733.19209051 49 | 48,-13.444444444444,374,9,0.061889111042023,0.034130853615701,140749.8140893 50 | 49,-14.925,380,10,0.042458220124245,0.034601580746938,143770.8537302 51 | 50,-13.527777777778,346,9,0.024435671687126,0.028946696706116,146801.87147307 52 | 51,-15.25,478,12,0.080733409762383,0.030970462821424,149838.03255987 53 | 52,-17,374,10,0.10193769836426,0.031945135511458,152843.86857486 54 | 53,-16.625,361,10,0.093968224525452,0.031251499648206,155836.19525099 55 | 54,-15.75,374,10,0.097057873010635,0.034328001841903,158878.77326703 56 | 55,-15.636363636364,413,11,0.11512637388706,0.030982962438837,161907.93762589 57 | 56,-15.95,359,10,0.13147314918041,0.029474855260924,164959.00357485 58 | 57,-13.65,376,10,0.11861356890202,0.031835000259802,167964.01353598 59 | 58,-14.916666666667,360,9,0.095459250211716,0.032202778627165,170963.46770382 60 | 59,-14.3,374,10,0.073190112948418,0.033053754236549,173959.79004407 61 | 60,-17.175,359,10,0.071609259605408,0.035708082264289,176995.46294403 62 | 61,-19.216666666667,429,15,0.033061665534973,0.036125382841565,180024.85727596 63 | 62,-13.181818181818,419,11,0.071844058156013,0.032730900298804,183063.08219004 64 | 63,-14.3,364,10,0.075431159377098,0.035262108405586,186085.26321888 65 | 64,-12.8,364,10,0.12851213681698,0.030993033098057,189114.62298989 66 | 65,-11.625,365,10,0.12246531331539,0.026319333478808,192137.97750092 67 | 66,-12.694444444444,361,9,0.10268555891514,0.033918607313186,195184.31896782 68 | 67,-13.138888888889,330,9,0.11241964411736,0.031062320400029,198208.45438385 69 | 68,-10.027777777778,314,9,0.11970028889179,0.030205767504871,201264.16876793 70 | 69,-17.15,361,10,0.10617531645298,0.030443864509463,204330.46117401 71 | 70,-13.611111111111,343,9,0.13672581923008,0.030963398275897,207367.36512089 72 | 71,-10.916666666667,340,9,0.13866924703121,0.02808295147866,210394.67483687 73 | 72,-13.111111111111,335,9,0.15276759397984,0.03073479475826,213406.28694057 74 | 73,-14,330,9,0.14818157732487,0.030045662116259,216401.18172574 75 | 74,-11.65,366,10,0.15149701762199,0.0306292267479,219403.89151478 76 | 75,-14.78125,318,8,0.14722499918938,0.032621489584446,222408.20259905 77 | 76,-13.527777777778,340,9,0.15238605070114,0.028705019278452,225461.34652686 78 | 77,-16.625,324,8,0.15269427335262,0.027108968123794,228506.12891603 79 | 78,-11.9375,319,8,0.14510065102577,0.02813640839234,231499.46051288 80 | 79,-14.611111111111,357,9,0.1547291328907,0.030465177953243,234527.91388178 81 | 80,-14.09375,320,8,0.16062089204788,0.028384334482253,237533.10557556 82 | 81,-13.5,461,12,0.16015643727779,0.029461187843233,240559.18904567 83 | 82,-15.388888888889,326,9,0.15006931102276,0.029733455169946,243585.51751566 84 | 83,-8.25,374,10,0.16304328477383,0.028653175026178,246613.00346756 85 | 84,-15,404,10,0.16597410941124,0.030505876637995,249631.14078665 86 | 85,-14.777777777778,363,9,0.1793319426775,0.028244167033583,252641.4623425 87 | 86,-12.818181818182,403,11,0.19172694313526,0.026959577117115,255642.5714035 88 | 87,-13.75,333,8,0.17769673538208,0.02880460199574,258670.40927029 89 | 88,-13.027777777778,355,9,0.17875989842415,0.027611443854868,261682.58066964 90 | 89,-14.555555555556,329,9,0.17230091142654,0.026754371542484,264715.41334033 91 | 90,-12.25,335,9,0.18951486063004,0.03095425176248,267752.74594522 92 | 91,-13.7,387,10,0.22818215501308,0.032679638968781,270781.42519116 93 | 92,-12.416666666667,338,9,0.227643846035,0.027799513872713,273799.21518421 94 | 93,-13.35,374,10,0.23371605634689,0.030105194956064,276822.62628102 95 | 94,-16.275,363,10,0.23556252193451,0.029598393268883,279846.54929209 96 | 95,-16.1875,317,8,0.24673552370071,0.028603928070515,282891.81562114 97 | 96,-13.555555555556,362,9,0.23167600369453,0.028245551455766,285943.53358817 98 | 97,-15.35,400,10,0.21881948363781,0.028914291147143,288987.05976105 99 | 98,-11.444444444444,349,9,0.23058039593697,0.027175322085619,292021.26357698 100 | 99,-12.975,368,10,0.22368267905712,0.026793881300837,295034.41267991 101 | -------------------------------------------------------------------------------- /plots/Pong2Player025p_history_B.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-11.25,547,17,-0.022026317223907,0.020198165625334,0 3 | 2,-20.17,568,25,-0.04641988992691,0.013739513330162,1853.2487101555 4 | 3,-12.595,1587,50,-0.064600818932056,0.029137924715877,4738.9991998672 5 | 4,-6.5833333333333,576,21,-0.051100649297237,0.023595076441765,7648.6803650856 6 | 5,-20.875,1740,82,-0.045074989318848,0.022099993273616,10568.621224165 7 | 6,-17.090909090909,567,22,-0.016678276002407,0.019567097578198,13514.659698248 8 | 7,-7.390625,923,32,-0.013433264136314,0.035526426170021,16481.173532248 9 | 8,-18.6875,582,24,-0.047428324580193,0.024294663522393,19472.213679314 10 | 9,0.81521739130435,586,23,-0.11416205596924,0.025039088575053,22485.965064049 11 | 10,-6.1875,567,20,-0.12096249431372,0.035912665607873,25520.983683109 12 | 11,0.875,553,22,-0.11538840609789,0.032637817841955,28549.404037952 13 | 12,-7.125,726,22,-0.12101293635368,0.03908227335359,31593.315760136 14 | 13,-13.407407407407,850,27,-0.14093002921343,0.041807466906495,34623.699748039 15 | 14,-18.083333333333,517,15,-0.20034552669525,0.056665569891222,37662.579718828 16 | 15,-3.3846153846154,728,26,-0.265248480618,0.044714272328652,40696.674022913 17 | 16,-2.75,566,19,-0.25556861174107,0.049150885151234,43748.12767005 18 | 17,0.9030612244898,1244,49,-0.24537132835388,0.045838969773613,46811.069835186 19 | 18,-1.875,1005,38,-0.20086278235912,0.049287679118104,49869.176441431 20 | 19,-0.93181818181818,598,22,-0.15542751729488,0.04646459036693,52920.243164301 21 | 20,-13.180555555556,714,18,-0.15927488678694,0.046686877326109,55978.05390048 22 | 21,-14.486842105263,704,19,-0.16698158144951,0.044650202055927,59030.078924179 23 | 22,0.75,1139,44,-0.093899932324886,0.043945835517719,62078.087513208 24 | 23,-14.671052631579,709,19,-0.062830573558807,0.045137236217037,65080.992939234 25 | 24,-12.710526315789,598,19,-0.033643757283688,0.045228893704712,68080.805202246 26 | 25,-17.973214285714,929,28,0.014084318518639,0.047282483488321,71091.365906477 27 | 26,-11.805555555556,696,18,0.048644723296165,0.045284945465624,74456.818608284 28 | 27,-11.039473684211,691,19,0.14962560451031,0.041659969978034,77459.906303406 29 | 28,-3.3035714285714,847,28,0.091666147172451,0.041126533688977,80472.901323557 30 | 29,-10.517857142857,519,14,0.092228712022305,0.04186979227839,83495.177422523 31 | 30,-7.5125,686,20,0.12615469282866,0.038390983216465,86509.107819557 32 | 31,-16.045454545455,427,11,0.16480913150311,0.038114958232269,89516.892535448 33 | 32,-15.895833333333,439,12,0.17521106117964,0.040174690205604,92509.944949389 34 | 33,-12.519230769231,493,13,0.19621461397409,0.044320241136476,95515.364245415 35 | 34,-10.826923076923,500,13,0.16478013241291,0.03882261980325,98538.464844465 36 | 35,-17.153846153846,490,13,0.14321630263329,0.041511953729205,101545.67380142 37 | 36,-15.5,489,12,0.11221461379528,0.04190587349236,104565.05976629 38 | 37,-11.886363636364,415,11,0.098011219799519,0.037842316889204,107557.31085825 39 | 38,-13.788461538462,449,13,0.1094850730896,0.036203425718471,110563.57991529 40 | 39,-14.514705882353,643,17,0.096934471487999,0.037548641140573,113553.20760727 41 | 40,-12.613636363636,439,11,0.12856424659491,0.035460023349151,116573.10384536 42 | 41,-17.3,539,15,0.14773236596584,0.034919066805858,119602.37818432 43 | 42,-14.777777777778,360,9,0.13221014523506,0.035461233332753,122643.66008639 44 | 43,-16.583333333333,481,12,0.1067987023592,0.031970723539591,125665.90826011 45 | 44,-15.409090909091,420,11,0.13014798927307,0.035180839031935,128680.62530828 46 | 45,-13.272727272727,407,11,0.096371849060059,0.035471203010529,131705.14903736 47 | 46,-12.4,372,10,0.12734847903252,0.034236836878583,134712.71475434 48 | 47,-6,396,12,0.1212975230813,0.030816859727725,137733.19209051 49 | 48,-15.805555555556,374,9,0.1186571495533,0.035329016719013,140749.8140893 50 | 49,-13.425,380,10,0.086061958432198,0.0307808713587,143770.8537302 51 | 50,-15.055555555556,346,9,0.090420981049538,0.032709428630769,146801.87147307 52 | 51,-12.4375,478,12,0.11497986483574,0.031309688353911,149838.03255987 53 | 52,-10.375,374,10,0.11617774283886,0.032193318966776,152843.86857486 54 | 53,-10.375,361,10,0.1357577880621,0.036922869921662,155836.19525099 55 | 54,-10.875,374,10,0.14001373767853,0.039279997609556,158878.77326703 56 | 55,-11.772727272727,413,11,0.13920881605148,0.030879472235218,161907.93762589 57 | 56,-10.45,359,10,0.13384447109699,0.034293823663145,164959.00357485 58 | 57,-14.4,376,10,0.12579261171818,0.032977014446631,167964.01353598 59 | 58,-12.833333333333,360,9,0.13741466021538,0.034957044512033,170963.46770382 60 | 59,-13.675,374,10,0.13554467487335,0.032587719732895,173959.79004407 61 | 60,-9.675,359,10,0.1283846218586,0.03228870106861,176995.46294403 62 | 61,-1.8833333333333,429,15,0.10752998292446,0.033881811052561,180024.85727596 63 | 62,-14.431818181818,419,11,0.13602916538715,0.031183257451281,183063.08219004 64 | 63,-12.55,364,10,0.1411640816927,0.035830748300999,186085.26321888 65 | 64,-13.675,364,10,0.13390322732925,0.03039492443949,189114.62298989 66 | 65,-15,365,10,0.16171702218056,0.031493642576039,192137.97750092 67 | 66,-16.305555555556,361,9,0.14895478498936,0.028515401256271,195184.31896782 68 | 67,-14.111111111111,330,9,0.14906409704685,0.034287901156582,198208.45438385 69 | 68,-15.722222222222,314,9,0.16527233350277,0.031774870052934,201264.16876793 70 | 69,-9.025,361,10,0.1847889149189,0.0319969650805,204330.46117401 71 | 70,-14.305555555556,343,9,0.18708206498623,0.029576429188251,207367.36512089 72 | 71,-16.75,340,9,0.19180698692799,0.028971399366856,210394.67483687 73 | 72,-14.222222222222,335,9,0.23490184652805,0.034007699653506,213406.28694057 74 | 73,-12.75,330,9,0.22783434331417,0.029517960537225,216401.18172574 75 | 74,-14.15,366,10,0.2203871024847,0.029430883809924,219403.89151478 76 | 75,-13.0625,318,8,0.22859213721752,0.029511279068887,222408.20259905 77 | 76,-13.805555555556,340,9,0.20844653385878,0.027854178383946,225461.34652686 78 | 77,-10.84375,324,8,0.20289381372929,0.02774667654559,228506.12891603 79 | 78,-16.46875,319,8,0.16040451908112,0.030247389111668,231499.46051288 80 | 79,-12.805555555556,357,9,0.14438503944874,0.030913321133703,234527.91388178 81 | 80,-13.9375,320,8,0.16219390559196,0.032817014321685,237533.10557556 82 | 81,-13.5,461,12,0.1669005920887,0.029075888812542,240559.18904567 83 | 82,-10.944444444444,326,9,0.14216573596001,0.028492409624159,243585.51751566 84 | 83,-17.625,374,10,0.17459906148911,0.029121529079974,246613.00346756 85 | 84,-13.125,404,10,0.16748613035679,0.028448160957545,249631.14078665 86 | 85,-12.138888888889,363,9,0.17306335294247,0.029834900505841,252641.4623425 87 | 86,-13.840909090909,403,11,0.18577182674408,0.03069648785796,255642.5714035 88 | 87,-14.375,333,8,0.16826687574387,0.027692098695785,258670.40927029 89 | 88,-14.972222222222,355,9,0.15338295567036,0.030362812533975,261682.58066964 90 | 89,-12.611111111111,329,9,0.15686451637745,0.031322851724923,264715.41334033 91 | 90,-14.75,335,9,0.17532988417149,0.028876138456166,267752.74594522 92 | 91,-13.075,387,10,0.14850094485283,0.029779071172699,270781.42519116 93 | 92,-14.5,338,9,0.14405781364441,0.026694667607546,273799.21518421 94 | 93,-13.35,374,10,0.17320177900791,0.028948133006692,276822.62628102 95 | 94,-10.275,363,10,0.18066636753082,0.027836792185903,279846.54929209 96 | 95,-12.125,317,8,0.20752841413021,0.027108559463173,282891.81562114 97 | 96,-13.694444444444,362,9,0.18776108336449,0.027645866759121,285943.53358817 98 | 97,-13.225,400,10,0.18295483493805,0.028122058689594,288987.05976105 99 | 98,-15.472222222222,349,9,0.1813165242672,0.026570522945374,292021.26357698 100 | 99,-14.1,368,10,0.16999678957462,0.030394074514508,295034.41267991 101 | -------------------------------------------------------------------------------- /plots/Pong2Player025p_meanq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_meanq.png -------------------------------------------------------------------------------- /plots/Pong2Player025p_reward_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_reward_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player025p_tderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_tderror.png -------------------------------------------------------------------------------- /plots/Pong2Player025p_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player025p_time.png -------------------------------------------------------------------------------- /plots/Pong2Player05.csv: -------------------------------------------------------------------------------- 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB 2 | 49;1;72;188;31;47872;-23,5;-23 3 | 49;2;65;184;31;47860;-24;-22,5 4 | 49;3;76;165;27;49880;-20,5;-20 5 | 49;4;73;185;29;48308;-22,5;-21 6 | 49;5;72;227;36;44252;-28;-26 7 | 49;6;42;120;20;54200;-14,5;-15,5 8 | 49;7;81;188;31;48052;-23,5;-23 9 | 49;8;65;186;30;48032;-22,5;-22,5 10 | 49;9;57;152;24;51260;-19,5;-16,5 11 | 49;10;86;205;31;46752;-24,5;-22 12 | -------------------------------------------------------------------------------- /plots/Pong2Player05_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_average_reward.png -------------------------------------------------------------------------------- /plots/Pong2Player05_episode_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_episode_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player05_history_A.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-23.565789473684,1060,38,-0.067710862189531,0.034837675005198,0 3 | 2,-22.15,567,20,-0.15684062993526,0.038721079081297,2135.1606602669 4 | 3,-21.365384615385,773,26,-0.25446475175023,0.042217464238405,5015.7156863213 5 | 4,-10.5,1810,86,-0.30862446391582,0.046876634538174,7923.8975212574 6 | 5,-11.36,556,25,-0.35418861603737,0.043869868814945,10845.98303318 7 | 6,-13.795454545455,553,22,-0.35598142755032,0.036427813351154,13779.444419146 8 | 7,-14.739130434783,545,23,-0.38907803559303,0.038076591491699,16730.656761169 9 | 8,-10.571428571429,1786,84,-0.40729899406433,0.038895908117294,19701.781215191 10 | 9,-12.2,1143,50,-0.50314592260122,0.041101832091808,22691.036135197 11 | 10,-17.723684210526,1062,38,-0.50624308091402,0.035921704351902,25693.220480204 12 | 11,-23.466666666667,906,30,-0.54614569222927,0.045043568909168,28695.295013189 13 | 12,-23.625,542,20,-0.60700052964687,0.035269764125347,31699.539398193 14 | 13,-13.863636363636,544,22,-0.61513235259056,0.033319144189358,34705.274416924 15 | 14,-23.578947368421,503,19,-0.59529578441381,0.031388927638531,37707.174799919 16 | 15,-27.769230769231,473,13,-0.58331776094437,0.034846514195204,40709.99874115 17 | 16,-23.35,530,20,-0.57454408812523,0.04202400124073,43711.185202837 18 | 17,-23.333333333333,547,21,-0.58036920952797,0.035804619640112,46715.193037987 19 | 18,-26.153846153846,473,13,-0.55386888027191,0.031593176573515,49718.601107836 20 | 19,-23.607142857143,507,14,-0.61488328480721,0.039815872758627,52720.651567936 21 | 20,-29.727272727273,454,11,-0.5656794731617,0.039149179130793,55723.797693014 22 | 21,-25.791666666667,455,12,-0.54490308320522,0.036643647104502,58725.664307117 23 | 22,-24.115384615385,465,13,-0.5066512928009,0.040177868783474,61731.177309275 24 | 23,-26.409090909091,417,11,-0.4840882229805,0.039173430681229,64735.67158699 25 | 24,-29.045454545455,425,11,-0.45775239634514,0.037823288202286,67730.783874989 26 | 25,-27.181818181818,421,11,-0.47300417554379,0.038609686613083,70763.969098806 27 | 26,-25.884615384615,458,13,-0.45065103256702,0.038901566147804,73791.41166997 28 | 27,-27.35,371,10,-0.50865966272354,0.048145369470119,76837.669150829 29 | 28,-28.409090909091,400,11,-0.47566920948029,0.038639131039381,79881.43487668 30 | 29,-26.272727272727,413,11,-0.47156949782372,0.044437561452389,82939.544976711 31 | 30,-27.7,405,10,-0.44191358315945,0.037687044411898,85980.046920538 32 | 31,-27.95,413,10,-0.4227322665453,0.038398827344179,89025.800366402 33 | 32,-27.954545454545,403,11,-0.44173110306263,0.036418485343456,92063.302292347 34 | 33,-28.75,387,10,-0.43116731095314,0.035739500701427,95107.170580387 35 | 34,-27.25,397,10,-0.4147837843895,0.03450074160099,98144.554581642 36 | 35,-27.590909090909,383,11,-0.41931443274021,0.034484442532063,101187.45636582 37 | 36,-28.083333333333,433,12,-0.41124797463417,0.037311921685934,104224.58697462 38 | 37,-28.409090909091,410,11,-0.40026402807236,0.036159102261066,107265.20277381 39 | 38,-27.95,392,10,-0.38983296847343,0.037148823037744,110301.78797388 40 | 39,-27.5,395,11,-0.38342036378384,0.035200391098857,113344.96769667 41 | 40,-26.727272727273,379,11,-0.41330251276493,0.04031700232625,116381.36457777 42 | 41,-28.333333333333,357,9,-0.38927046716213,0.033651781499386,119425.28358245 43 | 42,-26.318181818182,376,11,-0.38498378384113,0.035465506106615,122464.9944644 44 | 43,-28.1,372,10,-0.37079898560047,0.036857142180204,125509.42432022 45 | 44,-26.45,351,10,-0.3712386251688,0.034880570158362,128547.34342337 46 | 45,-28.8,385,10,-0.35417822515965,0.033587526723742,131589.36552644 47 | 46,-26.909090909091,368,11,-0.32902699255943,0.03582143689692,134627.12757063 48 | 47,-27.65,362,10,-0.34593545436859,0.036205421105027,137668.57984638 49 | 48,-27.05,371,10,-0.31687743508816,0.034691168367863,140704.32613134 50 | 49,-27.5,348,10,-0.32036348569393,0.036744026735425,143746.68808866 51 | 50,-28.222222222222,337,9,-0.30661095154285,0.032667124561965,146784.51735163 52 | 51,-27.15,339,10,-0.28461887812614,0.033706795692444,149827.91408563 53 | 52,-26.055555555556,328,9,-0.25844291603565,0.033171407248825,152864.17974257 54 | 53,-28.055555555556,337,9,-0.20742945981026,0.03217628205684,155907.42605758 55 | 54,-26.85,347,10,-0.22025959646702,0.034844072170556,158945.75263953 56 | 55,-29.333333333333,369,9,-0.19502455461025,0.034335268455092,161993.8620522 57 | 56,-27.611111111111,334,9,-0.19381985235214,0.03531576907495,165028.32910323 58 | 57,-28.3125,338,8,-0.20123857879639,0.033415464806138,168068.62531328 59 | 58,-27.222222222222,347,9,-0.21830435657501,0.032280782226182,171106.50689602 60 | 59,-28.5,367,9,-0.1951986335516,0.033280624641455,174149.95089102 61 | 60,-27.277777777778,342,9,-0.19836646234989,0.029407955253031,177187.28591776 62 | 61,-27.45,343,10,-0.21108313941956,0.03160761183966,180230.49803877 63 | 62,-27.15,348,10,-0.20018597757816,0.033951800141484,183269.95338082 64 | 63,-27.555555555556,318,9,-0.18895139217377,0.03070913996594,186314.83098292 65 | 64,-28,318,8,-0.18499350523949,0.029948033646564,189352.4879508 66 | 65,-27.8125,319,8,-0.19815232717991,0.030979598897276,192396.61688399 67 | 66,-29,331,8,-0.2042354580164,0.034979876468889,195436.35396481 68 | 67,-25.777777777778,340,9,-0.18630485057831,0.029006195977097,198481.27222991 69 | 68,-27.222222222222,347,9,-0.16837375974655,0.031752511370927,201521.16469193 70 | 69,-27.666666666667,343,9,-0.17215054941177,0.032913396080025,204565.77418089 71 | 70,-27.666666666667,326,9,-0.16846440696716,0.030616780133918,207606.91884899 72 | 71,-28.388888888889,350,9,-0.16197844016552,0.02940061276406,210652.6123538 73 | 72,-28.3125,331,8,-0.16286637437344,0.030343113770243,213691.91739488 74 | 73,-28.166666666667,322,9,-0.17059023880959,0.028513457268826,216737.55602193 75 | 74,-28.4375,306,8,-0.16656371164322,0.029049847560003,219778.28505278 76 | 75,-28.555555555556,355,9,-0.17663783371449,0.028326682368293,222824.99759769 77 | 76,-25.5625,308,8,-0.16812364423275,0.028427452790784,225863.61633468 78 | 77,-28,313,8,-0.17012423825264,0.029179202558938,228906.90767956 79 | 78,-25.6,330,10,-0.15948258709908,0.029502675250173,231945.58412576 80 | 79,-27.15,340,10,-0.17163393580914,0.031709079549648,234990.06086373 81 | 80,-28.9375,309,8,-0.17488246464729,0.029555166413484,238032.13610172 82 | 81,-28.357142857143,305,7,-0.16523126077652,0.028102440752089,241078.97931576 83 | 82,-28.055555555556,349,9,-0.17386763191223,0.029353871364699,244120.02995491 84 | 83,-28.375,313,8,-0.19983051693439,0.031149187764386,247165.05484152 85 | 84,-26.625,307,8,-0.17532221388817,0.029301581036765,250204.44619155 86 | 85,-25.75,291,8,-0.17028263640404,0.028546538640745,253247.87504554 87 | 86,-27.714285714286,287,7,-0.16522393465042,0.031246546869166,256287.07864952 88 | 87,-26.5,289,8,-0.15511447227001,0.033023809224367,259332.19374633 89 | 88,-28.125,304,8,-0.15494792342186,0.029552731960081,262373.06913757 90 | 89,-24.5625,286,8,-0.14808536505699,0.027394249363802,265416.93225241 91 | 90,-27.5,292,8,-0.1341061425209,0.028037310602143,268457.4805975 92 | 91,-28.0625,287,8,-0.132041872859,0.028428069008514,271504.65070438 93 | 92,-29.5,293,7,-0.13780510485172,0.029911585349124,274544.11319828 94 | 93,-28.357142857143,291,7,-0.12779329800606,0.028778882302344,277592.01316142 95 | 94,-26.5,292,8,-0.12557261633873,0.028597356364131,280631.97059441 96 | 95,-24.5,274,7,-0.13175105500221,0.029224281116389,283677.72791409 97 | 96,-27,296,8,-0.11238371729851,0.028369416657835,286719.95823526 98 | 97,-27.625,285,8,-0.10372178995609,0.029483026944101,289765.93972826 99 | 98,-28.5,276,7,-0.10349846041203,0.02689660487324,292806.88323212 100 | 99,-27.714285714286,275,7,-0.089309795498848,0.030658142995089,295852.95120811 101 | -------------------------------------------------------------------------------- /plots/Pong2Player05_history_B.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-18.118421052632,1060,38,-0.066835069596767,0.02591877129674,0 3 | 2,-19.25,567,20,-0.18020111098886,0.02074752292037,2135.1606602669 4 | 3,-23,773,26,-0.23922270989418,0.02354782563448,5015.7156863213 5 | 4,-21,1810,86,-0.27857883024216,0.033000697731972,7923.8975212574 6 | 5,-21.28,556,25,-0.33738064062595,0.022831210553646,10845.98303318 7 | 6,-22.613636363636,553,22,-0.36825464832783,0.035995476484299,13779.444419146 8 | 7,-20.478260869565,545,23,-0.37289713126421,0.021897726356983,16730.656761169 9 | 8,-21.035714285714,1786,84,-0.43349884229898,0.038497683167458,19701.781215191 10 | 9,-21.85,1143,50,-0.62161118167639,0.033715693116188,22691.036135197 11 | 10,-23.842105263158,1062,38,-0.63965305072069,0.046260520637035,25693.220480204 12 | 11,-21.433333333333,906,30,-0.64699254095554,0.053769414335489,28695.295013189 13 | 12,-15.75,542,20,-0.66198631811142,0.041308878362179,31699.539398193 14 | 13,-22.272727272727,544,22,-0.66889968365431,0.043348840415478,34705.274416924 15 | 14,-15.657894736842,503,19,-0.67822147250175,0.034765207916498,37707.174799919 16 | 15,-24.038461538462,473,13,-0.69069452720881,0.044168668866158,40709.99874115 17 | 16,-15.2,530,20,-0.62808234894276,0.043116836935282,43711.185202837 18 | 17,-15.166666666667,547,21,-0.61980935025215,0.058740401506424,46715.193037987 19 | 18,-27.5,473,13,-0.60524166983366,0.044296518594027,49718.601107836 20 | 19,-27.178571428571,507,14,-0.55743600380421,0.039298461139202,52720.651567936 21 | 20,-30,454,11,-0.54200337249041,0.047728110402822,55723.797693014 22 | 21,-27.333333333333,455,12,-0.53479717481136,0.046402426108718,58725.664307117 23 | 22,-27.807692307692,465,13,-0.59352023053169,0.044323350936174,61731.177309275 24 | 23,-27.181818181818,417,11,-0.57141543602943,0.048805070489645,64735.67158699 25 | 24,-28.363636363636,425,11,-0.55112496232986,0.046088153421879,67730.783874989 26 | 25,-28.454545454545,421,11,-0.52232072466612,0.043629713505507,70763.969098806 27 | 26,-25.461538461538,458,13,-0.51326719427109,0.042730105251074,73791.41166997 28 | 27,-23.95,371,10,-0.48516096317768,0.039971062466502,76837.669150829 29 | 28,-26,400,11,-0.47914367777109,0.04138122522831,79881.43487668 30 | 29,-25.954545454545,413,11,-0.51456713736057,0.04437631753087,82939.544976711 31 | 30,-28.85,405,10,-0.48131128048897,0.039471878141165,85980.046920538 32 | 31,-28.9,413,10,-0.41247966814041,0.043670239090919,89025.800366402 33 | 32,-25.909090909091,403,11,-0.4012852845192,0.03890059760958,92063.302292347 34 | 33,-26.15,387,10,-0.37383976536989,0.045206182360649,95107.170580387 35 | 34,-27.2,397,10,-0.34854346996546,0.035068557243794,98144.554581642 36 | 35,-23.681818181818,383,11,-0.38900647234917,0.035556656509638,101187.45636582 37 | 36,-24.791666666667,433,12,-0.3510031542182,0.03507110093534,104224.58697462 38 | 37,-26,410,11,-0.33158691716194,0.036370992548764,107265.20277381 39 | 38,-26.65,392,10,-0.346440944314,0.036729044564068,110301.78797388 40 | 39,-26.090909090909,395,11,-0.35682825219631,0.036668726488948,113344.96769667 41 | 40,-23.318181818182,379,11,-0.31966248720884,0.037916492588818,116381.36457777 42 | 41,-26,357,9,-0.31109520059824,0.036470890812576,119425.28358245 43 | 42,-23.045454545455,376,11,-0.29166408789158,0.039427526202053,122464.9944644 44 | 43,-25.3,372,10,-0.29205604588985,0.035345522232354,125509.42432022 45 | 44,-21.4,351,10,-0.29813618457317,0.035149823502637,128547.34342337 46 | 45,-26.4,385,10,-0.28132370436192,0.033947332246229,131589.36552644 47 | 46,-22.318181818182,368,11,-0.22567233264446,0.034842983233277,134627.12757063 48 | 47,-23.8,362,10,-0.23975029945374,0.041935967186466,137668.57984638 49 | 48,-25.75,371,10,-0.21593493235111,0.035218815199565,140704.32613134 50 | 49,-24.25,348,10,-0.21577809095383,0.034836648333818,143746.68808866 51 | 50,-27.277777777778,337,9,-0.19347376406193,0.035338229847956,146784.51735163 52 | 51,-22.8,339,10,-0.18803761428595,0.034529903292656,149827.91408563 53 | 52,-24.777777777778,328,9,-0.18428690427542,0.035586897883564,152864.17974257 54 | 53,-26.277777777778,337,9,-0.16982698947191,0.037080030148849,155907.42605758 55 | 54,-22.8,347,10,-0.1805264787674,0.036383127157111,158945.75263953 56 | 55,-28,369,9,-0.19619113898277,0.039251363170333,161993.8620522 57 | 56,-23.722222222222,334,9,-0.16315816771984,0.035299850299722,165028.32910323 58 | 57,-27.5625,338,8,-0.17962335503101,0.036802242480684,168068.62531328 59 | 58,-25.444444444444,347,9,-0.18439667832851,0.038928838141263,171106.50689602 60 | 59,-27,367,9,-0.17241037005186,0.035762713017873,174149.95089102 61 | 60,-26.055555555556,342,9,-0.16759346574545,0.033813355355524,177187.28591776 62 | 61,-23.4,343,10,-0.17988516628742,0.035570928469766,180230.49803877 63 | 62,-24.75,348,10,-0.14266111797094,0.034371860966086,183269.95338082 64 | 63,-23.944444444444,318,9,-0.16104466897249,0.03449313522689,186314.83098292 65 | 64,-26.1875,318,8,-0.15914500325918,0.033191479151836,189352.4879508 66 | 65,-24.5,319,8,-0.16667608141899,0.03099166682642,192396.61688399 67 | 66,-28.1875,331,8,-0.17711827391386,0.034276712449268,195436.35396481 68 | 67,-28.222222222222,340,9,-0.1667426995039,0.041464160232805,198481.27222991 69 | 68,-26.444444444444,347,9,-0.15887207114697,0.03303444356285,201521.16469193 70 | 69,-24.666666666667,343,9,-0.16351392567158,0.034325236777309,204565.77418089 71 | 70,-25.333333333333,326,9,-0.16442284065485,0.037677678430919,207606.91884899 72 | 71,-28.111111111111,350,9,-0.15780285811424,0.033131264870055,210652.6123538 73 | 72,-26.25,331,8,-0.16163712793589,0.033998891314492,213691.91739488 74 | 73,-24.833333333333,322,9,-0.15641409653425,0.033916867008433,216737.55602193 75 | 74,-26.5,306,8,-0.18017208111286,0.034290954723489,219778.28505278 76 | 75,-27.777777777778,355,9,-0.20218924224377,0.034520228855079,222824.99759769 77 | 76,-28.0625,308,8,-0.1884888920784,0.03200924059283,225863.61633468 78 | 77,-25.0625,313,8,-0.19379569894075,0.032447021281347,228906.90767956 79 | 78,-19.7,330,10,-0.16559742546082,0.032409753373824,231945.58412576 80 | 79,-22.8,340,10,-0.17583794850111,0.033432859681547,234990.06086373 81 | 80,-28.4375,309,8,-0.17665959024429,0.031948379773647,238032.13610172 82 | 81,-29.5,305,7,-0.18224990254641,0.03095829824172,241078.97931576 83 | 82,-27.777777777778,349,9,-0.18671671444178,0.029398809920065,244120.02995491 84 | 83,-28.8125,313,8,-0.16162476235628,0.032981981403194,247165.05484152 85 | 84,-26.25,307,8,-0.16804763782024,0.033460189212114,250204.44619155 86 | 85,-28.4375,291,8,-0.14077527183294,0.033014024126343,253247.87504554 87 | 86,-26.928571428571,287,7,-0.17255865395069,0.031409454064909,256287.07864952 88 | 87,-24.3125,289,8,-0.16105569815636,0.034144075133605,259332.19374633 89 | 88,-27.1875,304,8,-0.17937077999115,0.030995232569054,262373.06913757 90 | 89,-27.5625,286,8,-0.17962557536364,0.029845997454599,265416.93225241 91 | 90,-26.5,292,8,-0.15064305710793,0.032113508773036,268457.4805975 92 | 91,-24.8125,287,8,-0.1413662314415,0.029414703928109,271504.65070438 93 | 92,-28.571428571429,293,7,-0.14779610908031,0.032279317857698,274544.11319828 94 | 93,-28.214285714286,291,7,-0.14539888328314,0.030164272069,277592.01316142 95 | 94,-27.3125,292,8,-0.13699457347393,0.030221082478762,280631.97059441 96 | 95,-28,274,7,-0.15508052033186,0.029217592090368,283677.72791409 97 | 96,-24.375,296,8,-0.16006426233053,0.030165801534429,286719.95823526 98 | 97,-23.75,285,8,-0.13804284149408,0.033159527854994,289765.93972826 99 | 98,-29.142857142857,276,7,-0.13101159918308,0.029509387072641,292806.88323212 100 | 99,-24.785714285714,275,7,-0.14010148745775,0.028426378406119,295852.95120811 101 | -------------------------------------------------------------------------------- /plots/Pong2Player05_meanq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_meanq.png -------------------------------------------------------------------------------- /plots/Pong2Player05_reward_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_reward_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player05_tderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_tderror.png -------------------------------------------------------------------------------- /plots/Pong2Player05_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05_time.png -------------------------------------------------------------------------------- /plots/Pong2Player05p_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_average_reward.png -------------------------------------------------------------------------------- /plots/Pong2Player05p_episode_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_episode_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player05p_history_A.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-12.190476190476,566,21,0.0016725579500198,0.029398417342454,0 3 | 2,-13.184210526316,544,19,0.0090621307790279,0.027209096094593,2521.3481161594 4 | 3,-4.625,746,24,0.0068334065973759,0.026827841966413,5990.4787950516 5 | 4,-8.5862068965517,817,29,0.026928184151649,0.028336934078485,9408.9113001823 6 | 5,8.6041666666667,554,24,0.029909803807735,0.031383067324758,12723.580034971 7 | 6,-12.421052631579,563,19,0.046930429577827,0.025520376451313,16028.401235104 8 | 7,-0.078947368421053,552,19,0.072756165266037,0.032294995471835,19352.323368073 9 | 8,9.6125,1759,80,0.092696217417717,0.034000042662024,22690.247012377 10 | 9,-10.340909090909,563,22,0.059791999518871,0.0315357653182,26035.510334253 11 | 10,-13.886363636364,727,22,0.041338154196739,0.050249467393383,29387.56384325 12 | 11,1.5555555555556,525,18,0.015745346724987,0.047766451098025,32737.631895304 13 | 12,-0.63157894736842,619,19,-0.0079051206707954,0.040352991356369,36082.453429222 14 | 13,-6.436170212766,1535,47,0.05316678750515,0.042652126014233,39442.614874363 15 | 14,-11.119047619048,1401,42,0.038818866789341,0.048164960654452,42395.264083385 16 | 15,-13.583333333333,1384,42,0.10071529769897,0.048555156112649,45346.400170326 17 | 16,3.4901960784314,1428,51,0.021332919180393,0.052922020385042,48300.30147934 18 | 17,-13.119047619048,670,21,0.048792710542679,0.05994572978653,51265.125099182 19 | 18,-8.7916666666667,825,24,0.15720679599047,0.049980003822595,54222.657976151 20 | 19,-10.657894736842,705,19,0.16264330422878,0.045774164577946,57176.750576973 21 | 20,-1.3809523809524,718,21,0.23168902504444,0.079540585922077,60134.317682028 22 | 21,-0.6551724137931,936,29,0.26441657495499,0.048808562807739,63097.577541113 23 | 22,-8.8409090909091,835,22,0.28615098077059,0.048952121356502,66048.6204772 24 | 23,-8.7,765,20,0.3989036950469,0.048634952548891,69002.973978281 25 | 24,-4.125,580,16,0.4231478330493,0.046354526635259,71961.710373402 26 | 25,-5.5,612,17,0.44248150074482,0.053494612276554,74946.282803059 27 | 26,-5.452380952381,759,21,0.45483417850733,0.045858541212976,77999.638302326 28 | 27,-7.4411764705882,627,17,0.41578766536713,0.041342475254089,81190.814685345 29 | 28,-7.2,595,15,0.42314593607187,0.039326364312321,84337.967757225 30 | 29,-12.2,860,25,0.47111334341764,0.040940447449684,87465.261223078 31 | 30,-4.9285714285714,528,14,0.47050801891088,0.042695259518921,92498.191892147 32 | 31,-12.133333333333,524,15,0.45293545377254,0.04539662534371,95936.109339952 33 | 32,-12.875,427,12,0.45350726288557,0.038790578894317,98887.751207113 34 | 33,-11.142857142857,759,21,0.44122585648298,0.038887200295925,101841.51401019 35 | 34,-7,550,14,0.44989166086912,0.037162220109254,104789.37532616 36 | 35,-12.1,560,15,0.44096420383453,0.037667083889246,107743.42333221 37 | 36,-11.964285714286,545,14,0.41148587912321,0.040263948976994,110687.87245607 38 | 37,-9.0384615384615,453,13,0.37986992645264,0.040563675947487,113641.68067718 39 | 38,-9.8,568,15,0.38601289665699,0.036921709775925,116585.00527716 40 | 39,-7.4285714285714,521,14,0.38462983584404,0.041262718304992,119538.83113694 41 | 40,-7.6538461538462,491,13,0.35478970843554,0.036659536957741,122487.80203819 42 | 41,-8.75,401,10,0.36146842372417,0.0384386491552,125440.84892488 43 | 42,-8.85,383,10,0.3722738249898,0.03783643848449,128385.05406904 44 | 43,-3.2272727272727,413,11,0.35299725008011,0.035210303295404,131337.15963507 45 | 44,-8.3571428571429,533,14,0.33448323339224,0.036093774173409,134284.86390018 46 | 45,-8.6153846153846,489,13,0.37744086462259,0.033056518763304,137272.45820832 47 | 46,-6.2692307692308,478,13,0.3672527294755,0.033928428951651,140232.80584526 48 | 47,-6.5,426,11,0.3415291916728,0.033964893162251,143187.39262938 49 | 48,-10.884615384615,478,13,0.32564502882957,0.036381606638432,146151.68410921 50 | 49,-7.65625,588,16,0.32519111514091,0.031207777202129,149129.0859623 51 | 50,-5.0357142857143,503,14,0.30523685032129,0.031699912421405,152100.95436621 52 | 51,-11.590909090909,410,11,0.28875471013784,0.0325832104648,155060.54105639 53 | 52,-10.772727272727,397,11,0.27054508757591,0.035913828574121,158014.06233835 54 | 53,-8.6666666666667,355,9,0.26279150092602,0.03167043646425,160989.72913432 55 | 54,-12.727272727273,437,11,0.28409675872326,0.031264732986689,163942.92208934 56 | 55,-10.722222222222,332,9,0.28172516745329,0.036271000921726,166903.69907832 57 | 56,-9.5555555555556,348,9,0.28384761965275,0.030679102502763,169867.21692038 58 | 57,-8.1666666666667,429,12,0.28735686922073,0.036207194432616,172831.57644558 59 | 58,-11.181818181818,415,11,0.29033307236433,0.033343236750923,175790.21458626 60 | 59,-14.590909090909,375,11,0.30354784888029,0.036958446115255,178745.57849836 61 | 60,-7.35,415,10,0.2884727704525,0.033673165328801,181704.80793929 62 | 61,-9.8461538461538,523,13,0.29265542846918,0.032939915321767,184672.56316423 63 | 62,-11.384615384615,515,13,0.27368097442389,0.033678608834744,187634.86896515 64 | 63,-10.791666666667,464,12,0.25689987742901,0.029657803095877,190609.77085805 65 | 64,-9.9583333333333,477,12,0.24411515450478,0.033804434772581,193575.23226714 66 | 65,-9,444,12,0.23694155526161,0.031689680457115,196543.12847924 67 | 66,-12.590909090909,430,11,0.24975286895037,0.031562295537442,199508.03181028 68 | 67,-11.5,467,13,0.26802344965935,0.030717463061213,202485.6055882 69 | 68,-11.5,396,10,0.27379069811106,0.03399647770822,205457.95847321 70 | 69,-7.4583333333333,462,12,0.27509512645006,0.030351248666644,208423.81562114 71 | 70,-3,382,11,0.27350761300325,0.033080709680915,211385.97344613 72 | 71,-10.4,378,10,0.26220123666525,0.030410955883563,214351.19829679 73 | 72,-12.291666666667,429,12,0.28032770246267,0.029351569324732,217308.98916483 74 | 73,-8.0625,331,8,0.28249229198694,0.033711596421897,221972.7158649 75 | 74,-9.15,368,10,0.29987473875284,0.031458505049348,224964.42784786 76 | 75,-10.722222222222,332,9,0.30762102860212,0.030753311939538,227937.27384114 77 | 76,-11.181818181818,419,11,0.30988254141808,0.029774959594011,230895.05806398 78 | 77,-5.4444444444444,324,9,0.29618969237804,0.034235284376889,233852.17733288 79 | 78,-13,423,11,0.27776360011101,0.034624876495451,236795.76119375 80 | 79,-9.3888888888889,339,9,0.2646429989934,0.03197483446449,239746.05166459 81 | 80,-12.95,356,10,0.26202550524473,0.028922409027815,242697.99571753 82 | 81,-13.961538461538,452,13,0.25358505940437,0.030119795572013,245649.24792051 83 | 82,-12.5,398,11,0.2351166793704,0.033767471916974,249845.94104743 84 | 83,-12.538461538462,471,13,0.24164531832933,0.029401979405433,252805.92880058 85 | 84,-11.833333333333,335,9,0.24289074194431,0.026997715607285,255753.03083158 86 | 85,-13.708333333333,449,12,0.23900094258785,0.030221252571791,258703.89991164 87 | 86,-12.388888888889,339,9,0.2479863730073,0.029699892941862,261655.20484948 88 | 87,-10.791666666667,474,12,0.26849582207203,0.031834553465247,264607.36318159 89 | 88,-12,351,9,0.26750654709339,0.034056123487651,267564.47844744 90 | 89,-11.666666666667,420,12,0.25139537072182,0.032396519025788,270516.86856771 91 | 90,-10.5,335,9,0.28289089840651,0.033902511775494,273472.08324647 92 | 91,-10.85,385,10,0.27831519240141,0.030725085824728,276433.87832451 93 | 92,-6.55,374,10,0.28301594740152,0.033748467750847,279379.32505131 94 | 93,-13.6,359,10,0.28887090998888,0.032397522479296,282328.62156129 95 | 94,-9.1818181818182,421,11,0.2910089418292,0.034031861945987,285282.38619661 96 | 95,-13.277777777778,338,9,0.29555907207727,0.031591658495367,288230.86623335 97 | 96,-12.318181818182,401,11,0.29896997374296,0.032282556593418,291176.53364658 98 | 97,-12.958333333333,433,12,0.30080125010014,0.032803089857101,294131.79124165 99 | 98,-12,365,9,0.29047444790602,0.032203946605325,297084.02317357 100 | 99,-11.3,356,10,0.28700837224722,0.032082269743085,300040.07620072 101 | -------------------------------------------------------------------------------- /plots/Pong2Player05p_history_B.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-1.1904761904762,566,21,-0.016031626001,0.019111451271921,0 3 | 2,-0.78947368421053,544,19,-0.015928619652987,0.021086706319824,2521.3481161594 4 | 3,-10.5,746,24,0.017985911607742,0.023174186084419,5990.4787950516 5 | 4,-5.3793103448276,817,29,0.035138628304005,0.021087123163044,9408.9113001823 6 | 5,-19.895833333333,554,24,0.016244427502155,0.020806871149689,12723.580034971 7 | 6,-2.2368421052632,563,19,0.030516146123409,0.018756742022932,16028.401235104 8 | 7,-14.131578947368,552,19,0.061523339450359,0.031348906546831,19352.323368073 9 | 8,-20.55625,1759,80,0.10061441165209,0.026641976721585,22690.247012377 10 | 9,-2.1590909090909,563,22,0.072689354002476,0.043322645559907,26035.510334253 11 | 10,-2.4318181818182,727,22,0.044146400034428,0.046073728732765,29387.56384325 12 | 11,-15.861111111111,525,18,0.040192230582237,0.051530308254063,32737.631895304 13 | 12,-15.394736842105,619,19,0.0775425760746,0.049443882986903,36082.453429222 14 | 13,-9.6914893617021,1535,47,0.13757249355316,0.054976292358711,39442.614874363 15 | 14,-5.4047619047619,1401,42,0.14144300734997,0.051685801447136,42395.264083385 16 | 15,-2.5119047619048,1384,42,0.17247566401958,0.05379497281462,45346.400170326 17 | 16,-17.480392156863,1428,51,0.17486859929562,0.052109866876155,48300.30147934 18 | 17,-2.2619047619048,670,21,0.11819297283888,0.058114196747541,51265.125099182 19 | 18,-8.2291666666667,825,24,0.17078602439165,0.05748248129338,54222.657976151 20 | 19,-7.5789473684211,705,19,0.20490509063005,0.063521314557642,57176.750576973 21 | 20,-15.02380952381,718,21,0.20841271966696,0.064866852200124,60134.317682028 22 | 21,-14.931034482759,936,29,0.1713255674243,0.064945568051189,63097.577541113 23 | 22,-9.7272727272727,835,22,0.22036751252413,0.058795872772112,66048.6204772 24 | 23,-9.825,765,20,0.26435204058886,0.053478279015981,69002.973978281 25 | 24,-13.6875,580,16,0.28194195884466,0.055717939574271,71961.710373402 26 | 25,-11.852941176471,612,17,0.31042910403013,0.051862246179953,74946.282803059 27 | 26,-12.380952380952,759,21,0.30943595552444,0.047173657760024,77999.638302326 28 | 27,-10.441176470588,627,17,0.323223297894,0.046693474255502,81190.814685345 29 | 28,-11.4,595,15,0.35061695456505,0.041337524190545,84337.967757225 30 | 29,-4.82,860,25,0.32666733783484,0.041585235372186,87465.261223078 31 | 30,-13.071428571429,528,14,0.27311532890797,0.043553937409073,92498.191892147 32 | 31,-5.3333333333333,524,15,0.24466985583305,0.046891882263124,95936.109339952 33 | 32,-4.125,427,12,0.27309661978483,0.044171565037221,98887.751207113 34 | 33,-6.4285714285714,759,21,0.33880872291327,0.041125190071762,101841.51401019 35 | 34,-11.607142857143,550,14,0.29666004246473,0.04266371575743,104789.37532616 36 | 35,-6.1,560,15,0.32505999314785,0.039901426881552,107743.42333221 37 | 36,-6.6071428571429,545,14,0.34931463354826,0.036667717844248,110687.87245607 38 | 37,-8,453,13,0.34431127136946,0.039972880072892,113641.68067718 39 | 38,-9,568,15,0.3049511371851,0.0383997557275,116585.00527716 40 | 39,-10.642857142857,521,14,0.30735584205389,0.037219845436513,119538.83113694 41 | 40,-9.9615384615385,491,13,0.31245256966352,0.039549522027373,122487.80203819 42 | 41,-9.5,401,10,0.28530238294601,0.038102595917881,125440.84892488 43 | 42,-8.55,383,10,0.33918499720097,0.038491429306567,128385.05406904 44 | 43,-14.136363636364,413,11,0.34722890794277,0.036860883470625,131337.15963507 45 | 44,-10.392857142857,533,14,0.30450721997023,0.038401826992631,134284.86390018 46 | 45,-9.4230769230769,489,13,0.31083624172211,0.036519952371716,137272.45820832 47 | 46,-11.461538461538,478,13,0.31132104611397,0.036799871981144,140232.80584526 48 | 47,-11.818181818182,426,11,0.3102690179944,0.036717105448246,143187.39262938 49 | 48,-6.8461538461538,478,13,0.31750721180439,0.033276669070125,146151.68410921 50 | 49,-10.65625,588,16,0.34079497611523,0.035877283748239,149129.0859623 51 | 50,-12.75,503,14,0.35767333728075,0.032263810649514,152100.95436621 52 | 51,-6.1363636363636,410,11,0.35128286981583,0.035170908555388,155060.54105639 53 | 52,-7.0909090909091,397,11,0.34812305194139,0.033418237604201,158014.06233835 54 | 53,-9.6666666666667,355,9,0.34887997090816,0.035392617613077,160989.72913432 55 | 54,-5.6363636363636,437,11,0.32212809282541,0.035544243440032,163942.92208934 56 | 55,-7.3888888888889,332,9,0.31232571786642,0.034990294538438,166903.69907832 57 | 56,-8.3888888888889,348,9,0.33573294907808,0.036595604598522,169867.21692038 58 | 57,-9.6666666666667,429,12,0.32209365475178,0.031012426264584,172831.57644558 59 | 58,-7.2272727272727,415,11,0.32653800737858,0.034533048078418,175790.21458626 60 | 59,-1.9090909090909,375,11,0.31488067770004,0.031103349879384,178745.57849836 61 | 60,-12,415,10,0.29465030092001,0.034411353155971,181704.80793929 62 | 61,-9.5,523,13,0.28568968135118,0.030104446336627,184672.56316423 63 | 62,-7.1153846153846,515,13,0.27341676348448,0.032042900033295,187634.86896515 64 | 63,-7.7916666666667,464,12,0.30227653044462,0.03176537196897,190609.77085805 65 | 64,-8.3333333333333,477,12,0.31022936862707,0.032570629991591,193575.23226714 66 | 65,-9.125,444,12,0.29924517524242,0.033795852646232,196543.12847924 67 | 66,-6.0454545454545,430,11,0.3207828720808,0.032654751021415,199508.03181028 68 | 67,-6.4230769230769,467,13,0.33412792432308,0.030902617977932,202485.6055882 69 | 68,-7.3,396,10,0.3365140491128,0.027126253835857,205457.95847321 70 | 69,-10.208333333333,462,12,0.31692304229736,0.029001407504082,208423.81562114 71 | 70,-13.772727272727,382,11,0.31260763651133,0.031518132895231,211385.97344613 72 | 71,-8.3,378,10,0.31329001963139,0.028670164838433,214351.19829679 73 | 72,-5.0416666666667,429,12,0.31667448115349,0.030446218073368,217308.98916483 74 | 73,-10.875,331,8,0.31584479349852,0.0295074458085,221972.7158649 75 | 74,-7.8,368,10,0.3359946603775,0.027101310186088,224964.42784786 76 | 75,-7.3888888888889,332,9,0.32408956503868,0.028624357804656,227937.27384114 77 | 76,-6.6818181818182,419,11,0.31864206528664,0.032640161991119,230895.05806398 78 | 77,-11.777777777778,324,9,0.31573203909397,0.034313679173589,233852.17733288 79 | 78,-5.2272727272727,423,11,0.30915669256449,0.033906562924385,236795.76119375 80 | 79,-9.0555555555556,339,9,0.30403732931614,0.033609865434468,239746.05166459 81 | 80,-4.7,356,10,0.32141356164217,0.032473127380013,242697.99571753 82 | 81,-3.2307692307692,452,13,0.32835628992319,0.030475623458624,245649.24792051 83 | 82,-5.2727272727273,398,11,0.32665121507645,0.031192590117455,249845.94104743 84 | 83,-5.0384615384615,471,13,0.33629900109768,0.029393029239029,252805.92880058 85 | 84,-5.5,335,9,0.32858689790964,0.03369222028926,255753.03083158 86 | 85,-3.7083333333333,449,12,0.31140047758818,0.030493652947247,258703.89991164 87 | 86,-6.2222222222222,339,9,0.32676294052601,0.033686189174652,261655.20484948 88 | 87,-8.0416666666667,474,12,0.32105655437708,0.031828700378537,264607.36318159 89 | 88,-6.5,351,9,0.30024907886982,0.035195446297526,267564.47844744 90 | 89,-5.6666666666667,420,12,0.31623108011484,0.032271535798907,270516.86856771 91 | 90,-7.3333333333333,335,9,0.31568478637934,0.03311902026087,273472.08324647 92 | 91,-6.8,385,10,0.30999838203192,0.036625040695071,276433.87832451 93 | 92,-11.5,374,10,0.33208245170116,0.032671007275581,279379.32505131 94 | 93,-4.15,359,10,0.31672998613119,0.033254537403584,282328.62156129 95 | 94,-8.5,421,11,0.34280806851387,0.033647121354938,285282.38619661 96 | 95,-4.2777777777778,338,9,0.35397489124537,0.034459251075983,288230.86623335 97 | 96,-5.5,401,11,0.36022583991289,0.032835136532784,291176.53364658 98 | 97,-4.5833333333333,433,12,0.34793751347065,0.033339282497764,294131.79124165 99 | 98,-6.6666666666667,365,9,0.34626139050722,0.03423603951931,297084.02317357 100 | 99,-6.35,356,10,0.33870465636253,0.034306661583483,300040.07620072 101 | -------------------------------------------------------------------------------- /plots/Pong2Player05p_meanq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_meanq.png -------------------------------------------------------------------------------- /plots/Pong2Player05p_reward_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_reward_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player05p_tderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_tderror.png -------------------------------------------------------------------------------- /plots/Pong2Player05p_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player05p_time.png -------------------------------------------------------------------------------- /plots/Pong2Player075.csv: -------------------------------------------------------------------------------- 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB 2 | 49;1;20;1750;6;10620;-5,5;-5 3 | 49;2;2;1986;1;5032;-0,75;-1 4 | 49;3;28;1693;8;12160;-7;-7 5 | 49;4;44;1227;11;25080;-9,25;-10 6 | 49;5;13;1899;6;6340;-5;-5,5 7 | 49;6;24;1617;10;14288;-9,25;-8,25 8 | 49;7;9;1745;4;11664;-3,5;-3,5 9 | 49;8;63;1247;18;22684;-16,25;-15,25 10 | 49;9;11;1485;5;19376;-4,5;-4,25 11 | 49;10;34;1494;7;17680;-6;-6,25 12 | -------------------------------------------------------------------------------- /plots/Pong2Player075_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_average_reward.png -------------------------------------------------------------------------------- /plots/Pong2Player075_episode_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_episode_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player075_history_A.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-28.440476190476,674,21,-0.078805467367172,0.039509000293911,0 3 | 2,-23.939189189189,991,37,-0.19247976750135,0.049804273158312,2115.9324629307 4 | 3,-24.796875,891,32,-0.3266643422842,0.045056506931782,4963.2593638897 5 | 4,-27.617647058824,579,17,-0.37178059887886,0.05166561716795,7856.8729798794 6 | 5,-17.34375,544,24,-0.42195177257061,0.038914656162262,10780.833153009 7 | 6,-25.392857142857,984,35,-0.44268959724903,0.037266859948635,13695.362179756 8 | 7,-27.629032258065,952,31,-0.46903250986338,0.043649382352829,16637.670908928 9 | 8,-22.737704918033,1659,61,-0.61683112418652,0.041099587380886,19597.94380188 10 | 9,-22.085820895522,1505,67,-0.65997887200117,0.039472180187702,22610.275089979 11 | 10,-22.977272727273,1054,44,-0.72120541602373,0.042340319633484,26011.902555943 12 | 11,-21.684782608696,510,23,-0.76017710638046,0.034048043489456,29419.893827915 13 | 12,-28.8125,550,16,-0.75746470314264,0.035089691400528,32803.928194046 14 | 13,-29.826923076923,487,13,-0.80500614327192,0.043355384588242,36173.552186966 15 | 14,-23.808823529412,515,17,-0.81714948797226,0.035739391088486,39532.436095715 16 | 15,-30.903846153846,475,13,-0.77060514008999,0.040142685949802,42889.226594687 17 | 16,-29.979166666667,447,12,-0.7682787065506,0.043726075947285,45901.21411252 18 | 17,-31.727272727273,421,11,-0.70375599229336,0.044094051361084,48914.93513751 19 | 18,-31.681818181818,410,11,-0.67170395106077,0.042927358448505,51929.802863598 20 | 19,-33.727272727273,429,11,-0.67993818211555,0.039811842501163,54943.719038486 21 | 20,-27.910714285714,440,14,-0.58732054197788,0.03741382163763,57958.526197433 22 | 21,-33.1,397,10,-0.55022555148602,0.040578206688166,60973.126054525 23 | 22,-31.65,388,10,-0.57079403340816,0.039794300436974,63989.139203548 24 | 23,-31.045454545455,425,11,-0.5507935795188,0.040159463077784,67004.544019699 25 | 24,-30.431818181818,401,11,-0.51569227671623,0.037448912441731,70036.117438555 26 | 25,-32.25,392,10,-0.52514779758453,0.039347346395254,73039.492605686 27 | 26,-32.25,110,3,-0.47071734595299,0.036341747313738,76062.590397596 28 | 27,-32.3,383,10,-0.4714185205698,0.036542771577835,79094.062027454 29 | 28,-32.111111111111,344,9,-0.46518271881342,0.033119259476662,82460.400069475 30 | 29,-33.5,393,10,-0.46919112837315,0.034872973203659,85488.991984367 31 | 30,-32.725,372,10,-0.42546955668926,0.035309938177466,88513.200140238 32 | 31,-30.35,355,10,-0.39419751614332,0.037728601813316,91537.195670128 33 | 32,-32.175,367,10,-0.35395925176144,0.034827574513853,94553.798403025 34 | 33,-31.416666666667,324,9,-0.34443300080299,0.033128284499049,97573.81096077 35 | 34,-31.1,362,10,-0.33986711114645,0.032994185395539,100589.43251872 36 | 35,-31.944444444444,355,9,-0.34490878891945,0.033036506228149,103614.39954591 37 | 36,-30.25,358,10,-0.32507938158512,0.03493745534867,106632.72146487 38 | 37,-31,299,8,-0.29529939067364,0.03263739605248,109651.67803073 39 | 38,-31.111111111111,322,9,-0.30571579384804,0.033988059701398,112670.15548062 40 | 39,-30.666666666667,321,9,-0.31798388397694,0.037830376267433,115692.09249473 41 | 40,-29.75,64,1,-0.31833603322506,0.036371737275273,118718.38890862 42 | 41,-26.583333333333,200,6,-0.29567375910282,0.034206227421761,121740.25946045 43 | 42,-28.725,342,10,-0.28755002248287,0.034084396282211,125090.61474347 44 | 43,-32.1875,310,8,-0.27520984494686,0.034307959004771,128114.22638273 45 | 44,-30.75,248,6,-0.28520494115353,0.033417053673416,131138.29339767 46 | 45,-28.722222222222,304,9,-0.24047256863117,0.032669705805369,134158.79221582 47 | 46,-32.964285714286,281,7,-0.23651483201981,0.034113161490299,137181.63822651 48 | 47,-30.75,304,8,-0.22929638022184,0.035676180838374,140209.12533069 49 | 48,-32.027777777778,330,9,-0.22382859092951,0.033306384172756,143231.82737756 50 | 49,-31.96875,302,8,-0.22882852852345,0.035149923972203,146256.2114768 51 | 50,-31.875,311,8,-0.19476912009716,0.033764365680981,149283.37731457 52 | 51,0,26,0,-0.20900189971924,0.036285256405827,152311.60005665 53 | 52,-30.964285714286,259,7,-0.21334506094456,0.033617449524812,155335.61908579 54 | 53,-32.15625,306,8,-0.21985206311941,0.035054566949606,158679.64606357 55 | 54,0,10,0,-0.21830126535892,0.035162898614537,161709.62453771 56 | 55,0,1,0,-0.21426600271463,0.034257032785332,164731.5665195 57 | 56,-31.178571428571,258,7,-0.22991574454308,0.033552558975993,168073.77445865 58 | 57,0,8,0,-0.22385918140411,0.035624685370596,171421.55948472 59 | 58,0,32,0,-0.2307907345295,0.033521595358849,174889.82973671 60 | 59,0,30,0,-0.21952018594742,0.037359246272594,178231.51118279 61 | 60,0,13,0,-0.21459531658888,0.033596971890889,181576.5636518 62 | 61,-30.5,117,3,-0.22054652988911,0.034634251405951,184920.482651 63 | 62,-30.5,153,4,-0.21846684992313,0.035553850755794,188261.08853292 64 | 63,-26.75,41,1,-0.21431094408035,0.03240073265566,191643.86233997 65 | 64,0,2,0,-0.21656257855892,0.034314641881967,195033.02419901 66 | 65,-31.333333333333,220,6,-0.21119965481758,0.03203012568265,198387.73912501 67 | 66,-27.75,86,2,-0.19464588975906,0.03143425309821,201733.42537999 68 | 67,-35.25,57,1,-0.18309987294674,0.031151129993377,205141.69607902 69 | 68,0,20,0,-0.17650469058752,0.030899880289566,208501.50281811 70 | 69,0,29,0,-0.17109624695778,0.031047332327347,211865.68041515 71 | 70,-31.15,201,5,-0.168033218503,0.031188633478247,215208.4411881 72 | 71,-29.916666666667,312,9,-0.16163378447294,0.031242613693699,218551.90005112 73 | 72,0,21,0,-0.18294815135002,0.030757001365069,221943.50013113 74 | 73,0,9,0,-0.20314860373735,0.031849301447161,225461.31916332 75 | 74,-28.75,57,1,-0.21980968916416,0.032152000057278,228792.55967522 76 | 75,-32.428571428571,273,7,-0.23677251183987,0.033057268942706,232123.80669713 77 | 76,0,4,0,-0.24064756345749,0.032449412482325,235467.32760644 78 | 77,0,13,0,-0.24053260362148,0.032613435472827,238480.73442841 79 | 78,0,5,0,-0.26564919984341,0.034383471850306,241808.53344035 80 | 79,0,17,0,-0.25765567457676,0.03478965630359,245132.98487949 81 | 80,0,1,0,-0.23240065824986,0.033272572978283,248461.24440837 82 | 81,0,28,0,-0.24689063930511,0.036688380377833,251784.86009049 83 | 82,0,4,0,-0.25523908925056,0.036049961570418,255111.13907838 84 | 83,-32.25,282,7,-0.24346972894669,0.035403252773918,258439.50497341 85 | 84,0,13,0,-0.26288804399967,0.033645914101973,261762.70466757 86 | 85,-33,222,5,-0.27623062241077,0.033039603222162,265189.84182572 87 | 86,0,7,0,-0.298228297472,0.035824844000861,268527.62760568 88 | 87,0,26,0,-0.31707869124413,0.036494645655155,271539.21241379 89 | 88,0,26,0,-0.30207984232903,0.033548617534339,274869.65589881 90 | 89,0,2,0,-0.29758540093899,0.035752976356074,278201.52565455 91 | 90,-29.75,36,1,-0.29844088804722,0.040411054283381,281534.15359473 92 | 91,0,15,0,-0.2869536460638,0.035340740567073,284860.0808897 93 | 92,0,23,0,-0.29461841249466,0.036445504205301,288192.82416868 94 | 93,0,20,0,-0.28057670366764,0.036911166271195,291518.35870671 95 | 94,-32.75,65,1,-0.27155382359028,0.036631896108855,294841.96218395 96 | 95,0,3,0,-0.26560618460178,0.034903613431379,298169.07247496 97 | 96,0,22,0,-0.26107378935814,0.034363488769159,301519.17273879 98 | 97,0,20,0,-0.25742229640484,0.033933212923817,304858.42739272 99 | 98,0,13,0,-0.25535943281651,0.033768573452719,308351.24397063 100 | -------------------------------------------------------------------------------- /plots/Pong2Player075_history_B.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-26.22619047619,674,21,-0.070096668124199,0.023073135763407,0 3 | 2,-21.986486486486,991,37,-0.16645452481508,0.031430733263493,2115.9324629307 4 | 3,-23.4921875,891,32,-0.26210048544407,0.033524731636047,4963.2593638897 5 | 4,-28.176470588235,579,17,-0.3241598880887,0.040739152014256,7856.8729798794 6 | 5,-21.8125,544,24,-0.38103352844715,0.038006673932075,10780.833153009 7 | 6,-23.357142857143,984,35,-0.39804885518551,0.030070164382458,13695.362179756 8 | 7,-25.943548387097,952,31,-0.42522195124626,0.039641461074352,16637.670908928 9 | 8,-24.827868852459,1659,61,-0.48663937526941,0.033724322736263,19597.94380188 10 | 9,-17.19776119403,1505,67,-0.58310771363974,0.039613855063915,22610.275089979 11 | 10,-18.386363636364,1054,44,-0.67385898274183,0.047257747024298,26011.902555943 12 | 11,-16.663043478261,510,23,-0.74345404291153,0.047774670898914,29419.893827915 13 | 12,-28.71875,550,16,-0.7510941234827,0.047621515154839,32803.928194046 14 | 13,-31.557692307692,487,13,-0.77566252315044,0.045721424460411,36173.552186966 15 | 14,-27.044117647059,515,17,-0.7251959322691,0.046316379964352,39532.436095715 16 | 15,-31.019230769231,475,13,-0.70267062968016,0.047811714947224,42889.226594687 17 | 16,-30.395833333333,447,12,-0.72241412043571,0.051598012924194,45901.21411252 18 | 17,-31.909090909091,421,11,-0.64478324902058,0.042378659039736,48914.93513751 19 | 18,-30.522727272727,410,11,-0.59752578818798,0.047355640590191,51929.802863598 20 | 19,-32.931818181818,429,11,-0.59042614203691,0.048118498712778,54943.719038486 21 | 20,-24.964285714286,440,14,-0.61142364943027,0.04725267457962,57958.526197433 22 | 21,-32,397,10,-0.5925022212863,0.046299857914448,60973.126054525 23 | 22,-29.95,388,10,-0.58906208235025,0.047253952890635,63989.139203548 24 | 23,-32.113636363636,425,11,-0.56366939502954,0.042660036683083,67004.544019699 25 | 24,-31.613636363636,401,11,-0.59884598815441,0.040162801504135,70036.117438555 26 | 25,-31.1,392,10,-0.56721539533138,0.041649486452341,73039.492605686 27 | 26,-30.75,110,3,-0.54824591767788,0.039677893638611,76062.590397596 28 | 27,-31.4,383,10,-0.53941313302517,0.039290070354939,79094.062027454 29 | 28,-32.055555555556,344,9,-0.52132974845171,0.041671369194984,82460.400069475 30 | 29,-32.825,393,10,-0.50579402017593,0.040201214283705,85488.991984367 31 | 30,-32.2,372,10,-0.48678982871771,0.041224533766508,88513.200140238 32 | 31,-28.275,355,10,-0.39500003600121,0.036619587063789,91537.195670128 33 | 32,-31.875,367,10,-0.34533633959293,0.040921632256359,94553.798403025 34 | 33,-29.833333333333,324,9,-0.36864508163929,0.037416964419186,97573.81096077 35 | 34,-29.45,362,10,-0.36766059172153,0.038218563757837,100589.43251872 36 | 35,-32.027777777778,355,9,-0.36577108734846,0.038333136588335,103614.39954591 37 | 36,-28.375,358,10,-0.35177277600765,0.040965191282332,106632.72146487 38 | 37,-30.03125,299,8,-0.34595155924559,0.036479432418942,109651.67803073 39 | 38,-29.555555555556,322,9,-0.30786217570305,0.039175052693114,112670.15548062 40 | 39,-28.638888888889,321,9,-0.3109705504775,0.039023913567886,115692.09249473 41 | 40,-31.5,64,1,-0.32269009959698,0.037012370828539,118718.38890862 42 | 41,-28.25,200,6,-0.29259986150265,0.039351981215179,121740.25946045 43 | 42,-26.05,342,10,-0.31786714351177,0.03637124568969,125090.61474347 44 | 43,-32.125,310,8,-0.31034964895248,0.037696102849673,128114.22638273 45 | 44,-32.25,248,6,-0.33015128362179,0.036158583816141,131138.29339767 46 | 45,-30,304,9,-0.35357726168633,0.03773136805743,134158.79221582 47 | 46,-32.785714285714,281,7,-0.36206952941418,0.036562707044184,137181.63822651 48 | 47,-32.03125,304,8,-0.3573007928133,0.036651544630527,140209.12533069 49 | 48,-30.777777777778,330,9,-0.36903064417839,0.039021208509803,143231.82737756 50 | 49,-30.59375,302,8,-0.36960311710835,0.034745419502258,146256.2114768 51 | 50,-31.78125,311,8,-0.35660752868652,0.036782532520592,149283.37731457 52 | 51,0,26,0,-0.35973270308971,0.039194688305259,152311.60005665 53 | 52,-31.535714285714,259,7,-0.32979233515263,0.039308880701661,155335.61908579 54 | 53,-33.03125,306,8,-0.30454251247644,0.038068914060481,158679.64606357 55 | 54,0,10,0,-0.3077388882637,0.03486764463596,161709.62453771 56 | 55,0,1,0,-0.287833101511,0.036615976681584,164731.5665195 57 | 56,-30.071428571429,258,7,-0.28559452927113,0.038736656761728,168073.77445865 58 | 57,0,8,0,-0.28965576517582,0.03533292925451,171421.55948472 59 | 58,0,32,0,-0.29404142677784,0.03537861924246,174889.82973671 60 | 59,0,30,0,-0.27960905748606,0.035781114966609,178231.51118279 61 | 60,0,13,0,-0.26389364635944,0.035452972461237,181576.5636518 62 | 61,-30.75,117,3,-0.25413168799877,0.036756916988408,184920.482651 63 | 62,-32.0625,153,4,-0.24930246031284,0.03570568712987,188261.08853292 64 | 63,-29.25,41,1,-0.2609234815836,0.034284014912322,191643.86233997 65 | 64,0,2,0,-0.26833349788189,0.036336692431942,195033.02419901 66 | 65,-31.083333333333,220,6,-0.2487211471796,0.033628752228804,198387.73912501 67 | 66,-24.75,86,2,-0.2333458237648,0.032874883798882,201733.42537999 68 | 67,-34.75,57,1,-0.22106080913544,0.032228405235335,205141.69607902 69 | 68,0,20,0,-0.2113011238575,0.032315241830423,208501.50281811 70 | 69,0,29,0,-0.2016722881794,0.032087664677761,211865.68041515 71 | 70,-30.1,201,5,-0.19333104896545,0.031947336994577,215208.4411881 72 | 71,-27.638888888889,312,9,-0.18695436799526,0.031283404531889,218551.90005112 73 | 72,0,21,0,-0.22919175350666,0.031978903968818,221943.50013113 74 | 73,0,9,0,-0.22746892988682,0.032814852227923,225461.31916332 75 | 74,-30.75,57,1,-0.24791103869677,0.035097305248724,228792.55967522 76 | 75,-32.071428571429,273,7,-0.24014590930939,0.035565375929698,232123.80669713 77 | 76,0,4,0,-0.24898050534725,0.034651560001832,235467.32760644 78 | 77,0,13,0,-0.27807631742954,0.036324908265844,238480.73442841 79 | 78,0,5,0,-0.28194893968105,0.039241063005291,241808.53344035 80 | 79,0,17,0,-0.29142114639282,0.038866422567284,245132.98487949 81 | 80,0,1,0,-0.30010227823257,0.038893028046004,248461.24440837 82 | 81,0,28,0,-0.30566997385025,0.038882899645716,251784.86009049 83 | 82,0,4,0,-0.2768429749012,0.039276440129615,255111.13907838 84 | 83,-31,282,7,-0.28723658442497,0.037448742699809,258439.50497341 85 | 84,0,13,0,-0.30693704926968,0.038763760032132,261762.70466757 86 | 85,-32.1,222,5,-0.28970084834099,0.037739263318479,265189.84182572 87 | 86,0,7,0,-0.30202293968201,0.037263768468052,268527.62760568 88 | 87,0,26,0,-0.30429276943207,0.038816785745323,271539.21241379 89 | 88,0,26,0,-0.29531943643093,0.037614325005561,274869.65589881 90 | 89,0,2,0,-0.28496080207825,0.039185200965963,278201.52565455 91 | 90,-31.5,36,1,-0.28107423722744,0.039146922259592,281534.15359473 92 | 91,0,15,0,-0.26688726651669,0.040104887501802,284860.0808897 93 | 92,0,23,0,-0.28069443309307,0.037644153595902,288192.82416868 94 | 93,0,20,0,-0.30239212501049,0.037012866429053,291518.35870671 95 | 94,-33.75,65,1,-0.29299341022968,0.037025410333648,294841.96218395 96 | 95,0,3,0,-0.28267125856876,0.035941258675884,298169.07247496 97 | 96,0,22,0,-0.27609798681736,0.03508983820118,301519.17273879 98 | 97,0,20,0,-0.27340524804592,0.034850677061826,304858.42739272 99 | 98,0,13,0,-0.27220576739311,0.034442967168987,308351.24397063 100 | -------------------------------------------------------------------------------- /plots/Pong2Player075_meanq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_meanq.png -------------------------------------------------------------------------------- /plots/Pong2Player075_reward_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_reward_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player075_tderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_tderror.png -------------------------------------------------------------------------------- /plots/Pong2Player075_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075_time.png -------------------------------------------------------------------------------- /plots/Pong2Player075p.csv: -------------------------------------------------------------------------------- 1 | training Epoch;Seed;WallBounces;SideBounce;Points;ServingTime;RewardA;RewardB 2 | 49;1;234;314;40;19048;-6,75;-3,25 3 | 49;2;247;285;41;20716;-4,25;-6 4 | 49;3;270;322;41;16028;-4,25;-6 5 | 49;4;276;337;41;20052;-6;-4,25 6 | 49;5;251;269;40;16840;-6,75;-3,25 7 | 49;6;250;350;39;19668;-2,25;-7,5 8 | 49;7;230;286;37;14700;-9;-0,25 9 | 49;8;232;259;38;22368;-1,25;-8,25 10 | 49;9;207;246;31;29452;5,75;-13,5 11 | 49;10;191;216;32;14024;4,75;-12,75 12 | -------------------------------------------------------------------------------- /plots/Pong2Player075p_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_average_reward.png -------------------------------------------------------------------------------- /plots/Pong2Player075p_episode_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_episode_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player075p_history_A.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,15.75,1807,86,0.024660635497421,0.030749857582152,0 3 | 2,15.75,1808,86,0.051130523443222,0.030327398702502,3080.0637950897 4 | 3,10.659090909091,556,22,0.068492970630527,0.02893841022253,6112.7202789783 5 | 4,8.0729166666667,591,24,0.088593688845634,0.030321074590087,9164.36353302 6 | 5,10.583333333333,566,21,0.10387785038352,0.029432385802269,12226.163316011 7 | 6,-5.5,568,18,0.10838790327311,0.031264565363526,15295.448102951 8 | 7,-7.9375,542,16,0.11674013504386,0.029484617933631,18352.79207778 9 | 8,12.166666666667,599,24,0.16122512874007,0.052343306601048,21446.514513969 10 | 9,10.806603773585,1267,53,0.2247808945179,0.037818958848715,24605.232800007 11 | 10,15.053797468354,1720,79,0.24170792597532,0.044391799166799,27684.980213881 12 | 11,-1.5326086956522,1590,46,0.24667657256126,0.050101535048336,30776.000858068 13 | 12,-9.7083333333333,1478,48,0.22868193393946,0.053942181050777,33859.155449152 14 | 13,-4.7727272727273,1436,44,0.39942449390888,0.063749760290608,36948.059296131 15 | 14,-11.257575757576,1106,33,0.34375342148542,0.059864736787975,40032.171612024 16 | 15,7.202380952381,1264,42,0.32036609226465,0.069277341924608,43118.681558847 17 | 16,-13.28125,775,24,0.29566724926233,0.063349225189537,46197.414586782 18 | 17,0.94166666666667,1037,30,0.41377734726667,0.071681627377868,49284.942941904 19 | 18,5.75,748,24,0.33886451756954,0.068497176675126,52369.253451109 20 | 19,-8.71875,842,24,0.45781510215998,0.058275318542495,55457.752614021 21 | 20,-5.8676470588235,672,17,0.53462120777369,0.057810675021261,58542.084111214 22 | 21,-6.8421052631579,716,19,0.60764492100477,0.064270095337182,61634.21347928 23 | 22,-5.8571428571429,732,21,0.64261210149527,0.068129319958389,64716.454583406 24 | 23,-1.7738095238095,787,21,0.61844365131855,0.057609518219717,67804.824807405 25 | 24,-3.1071428571429,556,14,0.63900305151939,0.056879258744419,70885.788652182 26 | 25,1.1166666666667,534,15,0.6976574755311,0.049259904682636,73973.405339479 27 | 26,-1.5192307692308,468,13,0.64544979292154,0.04733891300112,77058.188096285 28 | 27,-3.75,554,14,0.6684123942256,0.04495135641098,80147.682602406 29 | 28,-2.0769230769231,507,13,0.66438281059265,0.047691881015897,83233.454415321 30 | 29,-5.9464285714286,519,14,0.6938461330533,0.047677454277873,86321.545918226 31 | 30,-5.625,540,14,0.62893603372574,0.048527343899012,89405.741168261 32 | 31,-5.8269230769231,468,13,0.6258493257165,0.042918682560325,92495.569252253 33 | 32,-5.96875,603,16,0.60775027537346,0.041709499500692,95580.412363291 34 | 33,-0.8125,437,12,0.56024558949471,0.043020392000675,98671.039297342 35 | 34,-6.6666666666667,581,15,0.57606478148699,0.037179504457861,101756.21317363 36 | 35,-6.2291666666667,432,12,0.54789750576019,0.039359708514065,104845.51876974 37 | 36,-8.75,453,12,0.4921031267643,0.042268126763403,107931.81754065 38 | 37,-4.0166666666667,590,15,0.48646099513769,0.042580844625831,111023.61872482 39 | 38,-8,516,15,0.49522505527735,0.042653404973447,114106.57322884 40 | 39,-1.3846153846154,507,13,0.48793794256449,0.04224406632781,117195.03930712 41 | 40,-5.4318181818182,423,11,0.48889998710155,0.040071641802788,120281.06809187 42 | 41,-4.9230769230769,505,13,0.48055751681328,0.039680305801332,123372.07204795 43 | 42,-6.0208333333333,482,12,0.45747923773527,0.042458710625768,126458.34766293 44 | 43,-1.9,555,15,0.45183955562115,0.042399740766734,129550.40173292 45 | 44,-7.5,479,13,0.47835363066196,0.042559848606586,132637.18359709 46 | 45,-6.6875,455,12,0.44431625276804,0.040284845102578,135729.21975207 47 | 46,-8.9807692307692,486,13,0.45677413457632,0.038720687437803,138815.07875204 48 | 47,-4.825,396,10,0.45799841880798,0.036913332745433,141908.51582503 49 | 48,-7.5681818181818,393,11,0.43723845565319,0.042279472753406,144993.19221425 50 | 49,-1.5,559,15,0.44824945348501,0.039370908103883,148082.60764003 51 | 50,-6.2083333333333,456,12,0.45087397807837,0.03716516715847,151168.20122814 52 | 51,-9.8,534,15,0.43702257186174,0.035734470188618,154257.148736 53 | 52,-8,492,13,0.44362791693211,0.03762533941865,157344.15754008 54 | 53,-5.25,459,12,0.45500042200089,0.036687616363168,160434.63341284 55 | 54,-5.3409090909091,413,11,0.4488413001895,0.034906475421041,163523.18911505 56 | 55,-6.4318181818182,436,11,0.45025093793869,0.036342834193259,166613.51492286 57 | 56,-8.0384615384615,461,13,0.46570703411102,0.03995348854363,169697.67845893 58 | 57,-7.6346153846154,501,13,0.43338271135092,0.039611381992698,172786.25870085 59 | 58,-7.7916666666667,437,12,0.44928603261709,0.040222072809935,175872.73005581 60 | 59,-5.9583333333333,463,12,0.46624615210295,0.040560769751668,178964.54942298 61 | 60,-9.9090909090909,413,11,0.4442690333128,0.038295251406729,182062.75369263 62 | 61,-3.8125,428,12,0.43601207023859,0.035158951945603,185158.80821776 63 | 62,-7.6923076923077,475,13,0.40569348537922,0.035799814514816,188250.66081071 64 | 63,-6.6363636363636,431,11,0.42681465303898,0.042572220876813,191351.09989786 65 | 64,-5,400,11,0.41745218658447,0.0398869420439,194434.95675683 66 | 65,-6,402,10,0.42086963629723,0.0396958931759,197527.92675591 67 | 66,-8.425,387,10,0.42330500152707,0.038195345409214,200615.96345854 68 | 67,-4.7727272727273,421,11,0.40715691006184,0.036385320782661,203720.71026158 69 | 68,-8.5833333333333,443,12,0.40344660624862,0.039129270635545,206804.60639071 70 | 69,-8.7,373,10,0.40128754279017,0.036490554556251,209908.93703961 71 | 70,-8.6666666666667,443,12,0.40851074105501,0.038445966929197,213017.6966126 72 | 71,-2.7272727272727,413,11,0.41067783224583,0.0347543014884,216075.20573568 73 | 72,-4.7708333333333,441,12,0.40127306774259,0.036854844558984,219183.13803959 74 | 73,-5.8958333333333,453,12,0.40975332427025,0.036320907175541,222279.96047664 75 | 74,-9.4090909090909,400,11,0.39718827790022,0.035727003157139,225375.15562463 76 | 75,-12.6875,408,12,0.39383558106422,0.036454273343086,228467.32659149 77 | 76,-3.9772727272727,409,11,0.40533371210098,0.037318908035755,231551.33051062 78 | 77,-4.6136363636364,429,11,0.39889466708899,0.036922183424234,234666.38622546 79 | 78,-8.8461538461538,475,13,0.39915732344985,0.036689936708659,237766.57640219 80 | 79,-7,445,12,0.38725353875756,0.032528960898519,240870.14482617 81 | 80,-8.75,426,11,0.39519296753407,0.034220179125667,243970.74544001 82 | 81,-5.2045454545455,388,11,0.39592769497633,0.03491831561923,247072.61222601 83 | 82,-7.1818181818182,376,11,0.40324878278375,0.035286599181592,250179.29944086 84 | 83,-9.875,379,10,0.38703223651648,0.032568644538522,253264.30240369 85 | 84,-5.9545454545455,416,11,0.40559637379646,0.037186695754528,256350.25635958 86 | 85,-6,449,11,0.40885295468569,0.035535577245057,259451.20921779 87 | 86,-9.2083333333333,428,12,0.40436582121253,0.033965634226799,262556.47954965 88 | 87,-5.9772727272727,383,11,0.41843406271935,0.033925799757242,265645.21741557 89 | 88,-5.6363636363636,432,11,0.40849586594105,0.038514320969582,268736.37077069 90 | 89,-6.9545454545455,416,11,0.40571232023835,0.036798851117492,271826.71463871 91 | 90,-9.15,361,10,0.39028529846668,0.037728272676468,274916.96188283 92 | 91,-6.125,430,12,0.40744237133861,0.037438799366355,278015.84529686 93 | 92,-10.159090909091,400,11,0.39765123099089,0.037313489928842,281105.74615073 94 | 93,-4.1,385,10,0.41264896920323,0.037302253067493,284197.1706388 95 | 94,-8.6111111111111,362,9,0.41678735074401,0.034228690594435,287305.07751155 96 | 95,-10.361111111111,322,9,0.42678021454811,0.033199661089806,290404.15810561 97 | 96,-5.6666666666667,355,9,0.40362212339044,0.038675586894155,293492.53668165 98 | 97,-8.1818181818182,404,11,0.40906615021825,0.033120844051242,296592.94970655 99 | 98,-5.225,380,10,0.41440004438162,0.036935546435416,299686.30462861 100 | 99,-5.875,374,10,0.41312079289556,0.036710994064808,302787.99873877 101 | -------------------------------------------------------------------------------- /plots/Pong2Player075p_meanq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_meanq.png -------------------------------------------------------------------------------- /plots/Pong2Player075p_reward_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_reward_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player075p_tderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_tderror.png -------------------------------------------------------------------------------- /plots/Pong2Player075p_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player075p_time.png -------------------------------------------------------------------------------- /plots/Pong2Player0_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_average_reward.png -------------------------------------------------------------------------------- /plots/Pong2Player0_episode_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_episode_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player0_history_A.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-11.227272727273,251,22,-0.025585235670209,0.025499564410187,0 3 | 2,-3.08,77,25,-0.10267928412557,0.026237351149321,2138.1417090893 4 | 3,-21,378,18,-0.1350394269228,0.018906522810459,5032.016119957 5 | 4,-16.333333333333,588,36,-0.17244348245859,0.027499322533607,7944.8819160461 6 | 5,-17.4,609,35,-0.17134431993961,0.03071308735013,10893.190360785 7 | 6,-18,630,35,-0.18266862034798,0.030370710909367,13839.044056892 8 | 7,-10.333333333333,217,21,-0.17723900139332,0.018381405025721,16811.021166086 9 | 8,-11.6,232,20,-0.17556799054146,0.022607351928949,19795.227915049 10 | 9,-12.03125,386,32,-0.21307243537903,0.027679172545671,22806.841446161 11 | 10,-13.51724137931,404,29,-0.23035820114613,0.02044185616076,25822.181778193 12 | 11,-6.875,165,24,-0.26865710783005,0.028466097682714,28847.795817852 13 | 12,-18.125,290,16,-0.26660308158398,0.023737326353788,31865.068264961 14 | 13,-11.966666666667,362,30,-0.32671620607376,0.025554981783032,34888.49136591 15 | 14,-8.45,171,20,-0.35948745524883,0.027599836587906,37907.987119913 16 | 15,-19,417,21,-0.39233250629902,0.02771189391613,40934.394598007 17 | 16,-16.789473684211,332,19,-0.35827468526363,0.024274397015572,43948.930922031 18 | 17,-14.055555555556,269,18,-0.40537124884129,0.02935955581069,46971.943500996 19 | 18,-18.157894736842,346,19,-0.38306200611591,0.028565983921289,49988.333903074 20 | 19,-21,527,25,-0.37758827137947,0.02897119282186,53014.636921883 21 | 20,-21,510,24,-0.40028989064693,0.031559363260865,56030.686516047 22 | 21,-21,382,18,-0.42341521680355,0.030187362790108,59056.352437973 23 | 22,-21,688,32,-0.48741152131557,0.032671946972609,62074.302101851 24 | 23,-14.928571428571,420,28,-0.44714593672752,0.034300659775734,65102.738009691 25 | 24,-21,420,20,-0.41901146733761,0.032433551505208,68122.853546858 26 | 25,-16.192307692308,421,26,-0.39868759739399,0.030124093748629,71147.998114824 27 | 26,-18.764705882353,324,17,-0.37142059743404,0.030927883055061,74168.781708956 28 | 27,-20.809523809524,450,21,-0.36811852920055,0.032222523305565,77202.353144884 29 | 28,-19.642857142857,284,14,-0.3046877477169,0.030882145626005,80223.19860363 30 | 29,-16.3125,262,16,-0.23653232705593,0.0310577664515,83246.392048597 31 | 30,-14.153846153846,185,13,-0.22916741073132,0.028503048017621,86263.41059041 32 | 31,-20,263,13,-0.1838299318552,0.028571169128641,89288.379914522 33 | 32,-15.083333333333,194,12,-0.18447684276104,0.028669490520842,92305.957981825 34 | 33,-20.9,221,10,-0.12901995027065,0.028296903408365,95331.062992573 35 | 34,-19.090909090909,217,11,-0.12079485702515,0.029319755812641,98348.517512321 36 | 35,-19.071428571429,272,14,-0.10410001826286,0.035293330463581,101371.28614831 37 | 36,-19.230769230769,269,13,-0.066544069170952,0.026355039540213,104388.2614243 38 | 37,-20.363636363636,226,11,-0.041784022212029,0.027937983275391,107410.07730746 39 | 38,-19,233,12,-0.046414270281792,0.024983407229651,110425.35021234 40 | 39,-20.1,214,10,-0.060125853300095,0.02581845295243,113446.5987165 41 | 40,-20.7,219,10,-0.012591427206993,0.028257514157332,116464.33995342 42 | 41,-20.2,212,10,-0.0086673645973206,0.026001896804199,119487.04254436 43 | 42,-20.9,218,10,-0.022646985054016,0.030275962976739,122504.26678848 44 | 43,-20.1,204,10,-0.027892006874084,0.028721425947733,125523.69126534 45 | 44,-18.5,190,10,-0.019060639977455,0.028338955501793,128542.41256523 46 | 45,-20.222222222222,197,9,-0.062995014309883,0.028499234545976,131564.51712227 47 | 46,-20.9,229,10,-0.04321956217289,0.027253655160312,134585.30974007 48 | 47,-19.222222222222,181,9,-0.04104344856739,0.029550002928823,137609.52091312 49 | 48,-17.777777777778,176,9,-0.042253398656845,0.029484741275664,140630.72361016 50 | 49,-18.333333333333,181,9,-0.044190877795219,0.030928210292011,143658.25809813 51 | 50,-20.5,219,10,-0.032392913341522,0.027665570050478,146677.15844607 52 | 51,-21,215,10,-0.025153607726097,0.030666341849719,149703.02046299 53 | 52,-21,198,9,-0.018558032989502,0.029278066476807,152725.13730097 54 | 53,-20.888888888889,196,9,-0.0093344089984894,0.029708028633497,155749.31631184 55 | 54,-20.2,211,10,0.0019440084695816,0.031671682446729,158769.69563389 56 | 55,-19.333333333333,179,9,0.018299667239189,0.026600243350491,161795.82533765 57 | 56,-21,232,11,0.0043197833299637,0.028242128532613,164816.60238647 58 | 57,-21,206,9,0.042310146689415,0.026893466249108,167839.94631767 59 | 58,-17.777777777778,169,9,0.04972175359726,0.027099281826988,170858.79717588 60 | 59,-17.444444444444,172,9,0.0208746291399,0.030874344742391,173883.27386665 61 | 60,-18.666666666667,173,9,0.029407291412354,0.029012460903265,176902.7581079 62 | 61,-20.666666666667,201,9,0.0078649291992188,0.028558519284707,179925.50136185 63 | 62,-20.222222222222,187,9,-0.0084951033592224,0.027179981692228,182941.50200248 64 | 63,-14,163,11,-0.031634729266167,0.026187739172485,185963.90782166 65 | 64,-19.111111111111,178,9,-0.037065606832504,0.03089484960027,188982.32703543 66 | 65,-20.222222222222,185,9,-0.045297539710999,0.031736915443093,192004.94367456 67 | 66,-20.5,182,8,-0.021077255725861,0.027121751084924,195026.34629083 68 | 67,-20,196,9,-0.01648589348793,0.026432656347286,198049.36023474 69 | 68,-19.555555555556,176,9,-0.0049410942792892,0.026054322204611,201068.14448476 70 | 69,-21,179,8,0.020968446135521,0.026585901013575,204092.56690407 71 | 70,-19.777777777778,179,9,0.040612026810646,0.028025028575212,207112.07806778 72 | 71,-17.875,154,8,0.040276304125786,0.02809495675005,210137.16612267 73 | 72,-20.888888888889,194,9,0.016871741056442,0.028380218307022,213155.54822254 74 | 73,-19.5,160,8,0.036802646875381,0.025027464595158,216179.3938725 75 | 74,-16.75,150,8,0.045555381655693,0.02841033472959,219198.43669176 76 | 75,-16.875,150,8,0.054811473846436,0.029059150262736,222221.3499825 77 | 76,-18.75,164,8,0.053368012666702,0.026548398239538,225238.91632152 78 | 77,-21,234,11,0.045312218904495,0.027458579693921,228261.53490162 79 | 78,-21,214,10,0.049028945684433,0.024622122777626,231281.81836367 80 | 79,-17.625,159,8,0.054034627914429,0.025933243244886,234308.13716269 81 | 80,-20.875,187,8,0.066353542208672,0.02484296876844,237327.41376281 82 | 81,-21,194,9,0.069789034128189,0.029652824990451,240351.45815897 83 | 82,-18.375,159,8,0.07883975148201,0.026677033578046,243371.4479301 84 | 83,-21,181,8,0.081944783449173,0.024853519943543,246393.29405212 85 | 84,-19.625,169,8,0.087315746188164,0.029320955432951,249412.52044725 86 | 85,-21,173,8,0.077867820382118,0.026834661031142,252438.86284018 87 | 86,-17.75,159,8,0.080595542669296,0.025142893266864,255460.55970812 88 | 87,-19.375,163,8,0.070458922982216,0.026407181705348,258488.0415895 89 | 88,-19.25,163,8,0.0803895226717,0.023286892633187,261507.89537716 90 | 89,-19.25,159,8,0.078255670189857,0.023623998247087,264530.37380409 91 | 90,-20.375,174,8,0.076555946469307,0.025433748307638,267552.72768211 92 | 91,-19.875,161,8,0.076039079427719,0.024356592353433,270579.07843208 93 | 92,-21,190,9,0.073541507840157,0.021778888463974,273597.35354686 94 | 93,-19.75,166,8,0.060794572353363,0.025047093832865,276620.24881077 95 | 94,-21,180,8,0.051815708041191,0.023602273864672,279639.6330328 96 | 95,-21,192,9,0.050514903306961,0.025672010847833,282665.09332156 97 | 96,-21,182,8,0.046867358803749,0.023934892632999,285687.98782468 98 | 97,-17.444444444444,159,9,0.058180457472801,0.022700749365613,288715.86644173 99 | 98,-21,205,9,0.056869177818298,0.023170761575224,291737.61343169 100 | 99,-16.125,135,8,0.053170693874359,0.020797000256367,294764.53351068 101 | -------------------------------------------------------------------------------- /plots/Pong2Player0_history_B.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-15.636363636364,347,22,-0.035547246217728,0.016063386268914,0 3 | 2,-20.2,508,25,-0.093115000188351,0.020183824717999,2138.1417090893 4 | 3,-10.111111111111,185,18,-0.13151980644464,0.016994844824076,5032.016119957 5 | 4,-10.527777777778,388,36,-0.15854629296064,0.019270747810602,7944.8819160461 6 | 5,-9.5428571428571,353,35,-0.17677662348747,0.024828659415245,10893.190360785 7 | 6,-9.8857142857143,355,35,-0.19931477379799,0.01351283454895,13839.044056892 8 | 7,-16.095238095238,353,21,-0.18255959832668,0.014386477082968,16811.021166086 9 | 8,-16.3,346,20,-0.19562866699696,0.022980797350407,19795.227915049 10 | 9,-12.9375,415,32,-0.35770421242714,0.027820185661316,22806.841446161 11 | 10,-15.344827586207,448,29,-0.42874370098114,0.043996010184288,25822.181778193 12 | 11,-20.75,506,24,-0.50003332948685,0.040714545398951,28847.795817852 13 | 12,-15.125,243,16,-0.46861186861992,0.036812288284302,31865.068264961 14 | 13,-13.366666666667,403,30,-0.46336123096943,0.035849552631378,34888.49136591 15 | 14,-20.5,423,20,-0.46318495810032,0.038222200334072,37907.987119913 16 | 15,-6.6190476190476,140,21,-0.45802145218849,0.035477287948132,40934.394598007 17 | 16,-16.105263157895,319,19,-0.45680632591248,0.053284872546792,43948.930922031 18 | 17,-15.5,284,18,-0.48365955197811,0.042265393778682,46971.943500996 19 | 18,-8.9473684210526,170,19,-0.53705098724365,0.044493979424238,49988.333903074 20 | 19,-1.8,46,25,-0.56766551327705,0.040447373002768,53014.636921883 21 | 20,-4.4583333333333,113,24,-0.49396479249001,0.034765257060528,56030.686516047 22 | 21,-6.1111111111111,113,18,-0.50199485063553,0.039501217216253,59056.352437973 23 | 22,-6.21875,202,32,-0.487187063694,0.039658673390746,62074.302101851 24 | 23,-18.035714285714,512,28,-0.47721617519855,0.038565734222531,65102.738009691 25 | 24,-4.6,92,20,-0.52219408416748,0.038196368992329,68122.853546858 26 | 25,-19.961538461538,520,26,-0.44699176561832,0.038303328856826,71147.998114824 27 | 26,-15.941176470588,272,17,-0.41673709726334,0.037333195909858,74168.781708956 28 | 27,-5.1428571428571,111,21,-0.3699722969532,0.038875097535551,77202.353144884 29 | 28,-13.571428571429,193,14,-0.34000007510185,0.040263681471348,80223.19860363 30 | 29,-20.1875,324,16,-0.31610184490681,0.039434242356569,83246.392048597 31 | 30,-20.076923076923,262,13,-0.30181977343559,0.029510153640062,86263.41059041 32 | 31,-16.769230769231,221,13,-0.26731790697575,0.032566703515127,89288.379914522 33 | 32,-18.833333333333,234,12,-0.22635623526573,0.033302143184468,92305.957981825 34 | 33,-15.5,172,10,-0.21410574710369,0.032989202816039,95331.062992573 35 | 34,-14.636363636364,176,11,-0.23098263418674,0.029800812546164,98348.517512321 36 | 35,-19,271,14,-0.20933587479591,0.045391301142052,101371.28614831 37 | 36,-15.769230769231,216,13,-0.1732489168644,0.040104366856627,104388.2614243 38 | 37,-15.727272727273,176,11,-0.14663448023796,0.03570713039767,107410.07730746 39 | 38,-17.916666666667,218,12,-0.15233700692654,0.036363918918738,110425.35021234 40 | 39,-15,158,10,-0.10284967255592,0.039888166427612,113446.5987165 41 | 40,-15,156,10,-0.110485252738,0.036288297230378,116464.33995342 42 | 41,-14.6,149,10,-0.08808405315876,0.033201564714778,119487.04254436 43 | 42,-13.9,152,10,-0.07288631439209,0.038115674535511,122504.26678848 44 | 43,-13.8,143,10,-0.055069948673248,0.032398316109553,125523.69126534 45 | 44,-17.4,181,10,-0.033978573799133,0.033925841143355,128542.41256523 46 | 45,-17.888888888889,175,9,-0.011699901461601,0.033647665683646,131564.51712227 47 | 46,-10.9,120,10,-0.039500725746155,0.033995908844285,134585.30974007 48 | 47,-17.222222222222,174,9,-0.012171776294708,0.036561916653533,137609.52091312 49 | 48,-19.555555555556,186,9,-0.014062858104706,0.036956321249716,140630.72361016 50 | 49,-17.333333333333,168,9,-0.011009687542915,0.030932576787192,143658.25809813 51 | 50,-13.5,139,10,-0.029655468344688,0.03278627650626,146677.15844607 52 | 51,-12.4,126,10,-0.021844506025314,0.034187985648401,149703.02046299 53 | 52,-14.222222222222,141,9,-0.021329047083855,0.032817083856557,152725.13730097 54 | 53,-15.444444444444,146,9,-0.010863201618195,0.031814879472833,155749.31631184 55 | 54,-13.7,143,10,-0.0052515711784363,0.032435634775247,158769.69563389 56 | 55,-16.888888888889,154,9,-0.023274070858955,0.030390426066704,161795.82533765 57 | 56,-12.363636363636,140,11,-0.013775194406509,0.029583558951505,164816.60238647 58 | 57,-14.111111111111,140,9,-0.010566335558891,0.03017777786497,167839.94631767 59 | 58,-19,182,9,-0.040504007816315,0.03008227618644,170858.79717588 60 | 59,-18.333333333333,178,9,-0.038168950676918,0.029901974986307,173883.27386665 61 | 60,-18.111111111111,166,9,-0.035144717693329,0.030881551608909,176902.7581079 62 | 61,-13.222222222222,134,9,-0.044505524277687,0.030492878863239,179925.50136185 63 | 62,-15.666666666667,144,9,-0.031238757967949,0.031001482838998,182941.50200248 64 | 63,-20.363636363636,234,11,-0.0081189591884613,0.031862079079729,185963.90782166 65 | 64,-17.555555555556,162,9,0.010118610024452,0.03147687318176,188982.32703543 66 | 65,-16.777777777778,156,9,0.0095146139860153,0.03700613715034,192004.94367456 67 | 66,-15.25,139,8,0.034772622585297,0.027561287589371,195026.34629083 68 | 67,-14.555555555556,144,9,0.047611666798592,0.032678590173833,198049.36023474 69 | 68,-15.777777777778,143,9,0.069406152129173,0.032473844949622,201068.14448476 70 | 69,-16.125,140,8,0.06597646343708,0.031676553032361,204092.56690407 71 | 70,-15.888888888889,146,9,0.076632283806801,0.029510184790008,207112.07806778 72 | 71,-18.75,163,8,0.076743006110191,0.02990186560154,210137.16612267 73 | 72,-13.444444444444,124,9,0.066774213075638,0.028356832669582,213155.54822254 74 | 73,-19,154,8,0.078227077245712,0.027551507115364,216179.3938725 75 | 74,-20,170,8,0.08356684923172,0.03249790078029,219198.43669176 76 | 75,-19.5,172,8,0.071725313782692,0.02946904342249,222221.3499825 77 | 76,-18.875,165,8,0.069635803937912,0.02919847426936,225238.91632152 78 | 77,-10.545454545455,116,11,0.058670830130577,0.025365659317002,228261.53490162 79 | 78,-12.1,121,10,0.060952044487,0.029573503917083,231281.81836367 80 | 79,-17.5,149,8,0.039388704061508,0.030934552330524,234308.13716269 81 | 80,-15.75,139,8,0.032999743103981,0.027728069182485,237327.41376281 82 | 81,-12.111111111111,117,9,0.02932597899437,0.02745916046761,240351.45815897 83 | 82,-18.5,154,8,0.043820874333382,0.028538968775421,243371.4479301 84 | 83,-15.5,136,8,0.026993710756302,0.027210989664309,246393.29405212 85 | 84,-17.625,148,8,0.035026452422142,0.029123003091663,249412.52044725 86 | 85,-16.75,138,8,0.058367486596107,0.029693385642022,252438.86284018 87 | 86,-18.875,168,8,0.073346694946289,0.028949410717934,255460.55970812 88 | 87,-17.625,144,8,0.070859031200409,0.028716616470367,258488.0415895 89 | 88,-18.125,157,8,0.064134326934814,0.030109628035687,261507.89537716 90 | 89,-18.75,151,8,0.076011339068413,0.031651783834212,264530.37380409 91 | 90,-15.25,134,8,0.085877468347549,0.030369257452898,267552.72768211 92 | 91,-19,157,8,0.091362419605255,0.026273697428405,270579.07843208 93 | 92,-13.555555555556,122,9,0.10069872260094,0.026822981573641,273597.35354686 94 | 93,-16.75,139,8,0.094791305541992,0.027533179562539,276620.24881077 95 | 94,-14.875,124,8,0.11578515303135,0.026371674910188,279639.6330328 96 | 95,-13.555555555556,122,9,0.10465205264091,0.025525284297764,282665.09332156 97 | 96,-13.5,114,8,0.11002721095085,0.023612505104393,285687.98782468 98 | 97,-16.444444444444,151,9,0.11250710332394,0.028119032643735,288715.86644173 99 | 98,-10.333333333333,100,9,0.12482603573799,0.028869435520843,291737.61343169 100 | 99,-20.625,178,8,0.12047631049156,0.030346720989794,294764.53351068 101 | -------------------------------------------------------------------------------- /plots/Pong2Player0_meanq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_meanq.png -------------------------------------------------------------------------------- /plots/Pong2Player0_reward_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_reward_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player0_tderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_tderror.png -------------------------------------------------------------------------------- /plots/Pong2Player0_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player0_time.png -------------------------------------------------------------------------------- /plots/Pong2PlayerVS_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_average_reward.png -------------------------------------------------------------------------------- /plots/Pong2PlayerVS_episode_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_episode_counts.png -------------------------------------------------------------------------------- /plots/Pong2PlayerVS_history_A.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,19.894736842105,1652,76,0.036629230499268,0.034258731238544,0 3 | 2,-10.864864864865,1032,37,0.088388940036297,0.036268266946077,2140.1843442917 4 | 3,6.75,566,20,0.10980970552564,0.035165142327547,5024.0690410137 5 | 4,17.782608695652,544,23,0.14155595132709,0.035153854876757,7954.2796509266 6 | 5,-8.2571428571429,982,35,0.18404190149903,0.034203920930624,10895.232643127 7 | 6,1.3636363636364,735,22,0.23041268283129,0.036833466291428,13843.955490112 8 | 7,-4.0555555555556,607,18,0.24128054684401,0.040439512044191,16800.791962147 9 | 8,-17.246376811594,1712,69,0.27102411770821,0.040627625931054,19781.40379405 10 | 9,-8.2608695652174,1735,69,0.37685767302662,0.047047990996391,22781.630583048 11 | 10,20.964705882353,1804,85,0.40086788585782,0.061366469353437,25832.945611 12 | 11,18.073529411765,1622,68,0.58332307624817,0.069838082119823,28871.55139184 13 | 12,-8.2558139534884,1330,43,0.60145196096599,0.066045082810801,31952.171814919 14 | 13,-6.5526315789474,1282,38,0.68109701417387,0.074076765326783,34966.254708767 15 | 14,-6,1087,32,0.7062286438942,0.081175927717239,37979.379287958 16 | 15,-7.1935483870968,1058,31,0.73730385822058,0.080288166455925,41006.026421785 17 | 16,2.8636363636364,843,22,0.76484009151161,0.06838713674061,44023.744074583 18 | 17,-4.25,841,24,0.82907662519813,0.070006534771994,47071.221020699 19 | 18,-1.6315789473684,732,19,0.82211218604445,0.079391503483057,50115.644058466 20 | 19,-7,762,22,1.0358029997051,0.083106067851186,53143.724182606 21 | 20,-7.4117647058824,599,17,1.0717535401285,0.068865682188421,56224.953065634 22 | 21,-4.0526315789474,681,19,1.0258434101641,0.07186470163241,59252.510054588 23 | 22,2.2631578947368,707,19,1.0864659250975,0.059363974899054,62302.120079756 24 | 23,3.3571428571429,526,14,1.0833346217573,0.063327269699425,65340.640899897 25 | 24,-0.47368421052632,750,19,1.0997896017432,0.062145422551781,68390.421947956 26 | 25,-3.2941176470588,640,17,1.102532440275,0.06356831882894,71446.502653837 27 | 26,-3,656,18,1.0374306339025,0.054802458614111,74549.215828657 28 | 27,-4,514,13,0.96129315176606,0.054592645399272,77634.196890831 29 | 28,-4,524,14,0.96601669877768,0.053654642223381,80685.836214781 30 | 29,-2.1764705882353,617,17,0.9382788464427,0.053507280364633,83716.217218876 31 | 30,-1.625,564,16,0.90140304681659,0.054804528713226,86816.62901473 32 | 31,-2.7333333333333,590,15,0.89678108328581,0.055654351443052,89896.952369928 33 | 32,-4.6666666666667,539,15,0.87170422816277,0.052837499111891,92944.94702816 34 | 33,0.375,593,16,0.86125646364689,0.054151404440403,96033.474353313 35 | 34,-5.4285714285714,517,14,0.80565326881409,0.048675433501601,99126.870426178 36 | 35,-8.3333333333333,418,12,0.83305224263668,0.048897989697754,102218.8840003 37 | 36,-7.2666666666667,544,15,0.81939549726248,0.062981894731522,105332.10956836 38 | 37,-5.5333333333333,521,15,0.78063530129194,0.054293885916471,108390.310781 39 | 38,-8.2857142857143,480,14,0.76713334679604,0.049812828674912,111577.00082707 40 | 39,-8.2,525,15,0.74416504707932,0.047728421837091,114681.56327724 41 | 40,-5.6666666666667,558,15,0.70014776280522,0.049224613063037,117785.27115202 42 | 41,-4.1818181818182,400,11,0.66092579168081,0.047589437417686,120884.00687814 43 | 42,-6.0833333333333,458,12,0.61858874151111,0.046044506199658,123987.18336725 44 | 43,-1.0666666666667,569,15,0.61212630918622,0.042183957137167,127176.64506316 45 | 44,-3,568,16,0.61761562865973,0.048996827743948,130457.61719894 46 | 45,-1.3333333333333,447,12,0.62245470565557,0.043741934835911,133590.15817618 47 | 46,-6.3571428571429,507,14,0.62223418441415,0.049592643141747,136745.98338032 48 | 47,-0.78571428571429,538,14,0.60995001575351,0.040588559448719,140288.5203855 49 | 48,-0.75,438,12,0.6117885850668,0.042088281303644,143582.21646833 50 | 49,-9,569,17,0.58831976529956,0.051058675073087,146885.37541223 51 | 50,-3.2857142857143,513,14,0.58009276488423,0.050145434975624,150514.73280025 52 | 51,0.85714285714286,516,14,0.56708448454738,0.05352240806818,153905.42626715 53 | 52,-0.66666666666667,355,9,0.603058814466,0.054049070090055,156998.08905911 54 | 53,-3.9230769230769,483,13,0.6199501991272,0.051908938907087,160223.2752831 55 | 54,-7.3333333333333,431,12,0.60428953760862,0.04895763245225,163514.40027618 56 | 55,0.25,476,12,0.60181038728356,0.050085354879498,166998.89249229 57 | 56,-5.3846153846154,458,13,0.62117442503572,0.046872297987342,170359.70756102 58 | 57,-3.2,359,10,0.60641793078184,0.052600849196315,173823.41051316 59 | 58,-4.8571428571429,492,14,0.62653725367784,0.057909801356494,177196.93280435 60 | 59,-2.0833333333333,411,12,0.62770763164759,0.044757399730384,180707.99882913 61 | 60,-6.4545454545455,418,11,0.62793077921867,0.053041671976447,184368.79880023 62 | 61,-6.6923076923077,478,13,0.62323064267635,0.051051275312901,188232.23590922 63 | 62,-7.25,437,12,0.64113255578279,0.04879417716153,191757.69521713 64 | 63,-5.3,369,10,0.62657707825303,0.048134090598673,195594.75462699 65 | 64,-5.6153846153846,468,13,0.64060921034217,0.045986624030396,199862.88124394 66 | 65,-4.3636363636364,424,11,0.63786323845387,0.047828262917697,203532.96440005 67 | 66,-6.2,376,10,0.64415379482508,0.04426491074264,207733.68452883 68 | 67,-8.6153846153846,444,13,0.6445699121356,0.054958277672529,211655.73910689 69 | 68,-0.61538461538462,488,13,0.64655533200502,0.038586034059525,215466.90428996 70 | 69,-4,486,13,0.64888703554869,0.043914513036609,219559.02877498 71 | 70,-0.33333333333333,447,12,0.64624559032917,0.047641892939806,223804.84991622 72 | 71,-6.5384615384615,472,13,0.63566690003872,0.049302317127585,227838.63679528 73 | 72,-4.6363636363636,417,11,0.63711099690199,0.051252304315567,232027.32367206 74 | 73,-8.2307692307692,443,13,0.63391436266899,0.048665264971554,236188.94491291 75 | 74,-3.3333333333333,450,12,0.62945883643627,0.048747919064015,241381.24751687 76 | 75,-3.5384615384615,478,13,0.62911532127857,0.047036009728909,245670.82948804 77 | 76,-5.0833333333333,442,12,0.61040000587702,0.04652617444098,250570.50234628 78 | 77,-1.3636363636364,452,11,0.57874590176344,0.045145205818117,258795.74901628 79 | 78,2.9090909090909,444,11,0.56367688274384,0.046146706014872,265376.65494132 80 | 79,1,433,11,0.58433644586802,0.044890030667186,273031.80326462 81 | 80,0.4,407,10,0.56603986006975,0.051472402080894,279453.38454461 82 | 81,-1.9,396,10,0.56983534914255,0.042509867101908,287339.86959958 83 | 82,-6.5,427,12,0.57795796376467,0.046896411240101,294809.03612781 84 | 83,-6.0769230769231,445,13,0.59138572341204,0.043647857218981,301846.70236588 85 | 84,-2,446,12,0.60097670978308,0.049626976758242,308882.32642794 86 | 85,-1.2,396,10,0.61287046802044,0.043968385025859,315383.48060513 87 | 86,-5.2727272727273,397,11,0.61317317944765,0.046134451977909,321902.52821803 88 | 87,-2,449,11,0.62379645317793,0.049722394168377,328601.83020878 89 | 88,-6.3333333333333,431,12,0.60928024560213,0.048750120550394,334467.63142991 90 | 89,-1.6666666666667,452,12,0.61758359968662,0.044201732039452,339582.35990906 91 | 90,-0.090909090909091,414,11,0.63299699753523,0.045960027575493,345550.17142677 92 | 91,-1.2727272727273,424,11,0.62909919255972,0.044356597349048,351840.05597472 93 | 92,-2.3333333333333,447,12,0.6411742810607,0.046878527492285,357930.80812883 94 | 93,-1.2,403,10,0.63069819927216,0.043852157630026,364560.41527987 95 | 94,-1.9,423,10,0.64790958750248,0.04368916927278,369735.55567384 96 | 95,-0.33333333333333,438,12,0.65804475861788,0.0468343205899,373734.55882072 97 | 96,2.5,374,10,0.65388202512264,0.041903344072402,377166.37988877 98 | 97,0.75,444,12,0.66520742946863,0.044931938365102,381859.93837285 99 | 98,-3.0909090909091,422,11,0.67006937599182,0.046003070726991,389255.18656874 100 | 99,-4.3636363636364,393,11,0.64541435182095,0.048469615295529,395686.37279058 101 | -------------------------------------------------------------------------------- /plots/Pong2PlayerVS_history_B.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-19.894736842105,1652,76,0.030753178421408,0.020417045775801,0 3 | 2,10.864864864865,1032,37,0.055571975842118,0.020495216928422,2140.1843442917 4 | 3,-6.75,566,20,0.078522597447038,0.018338140755892,5024.0690410137 5 | 4,-17.782608695652,544,23,0.11758578260243,0.019854311227798,7954.2796509266 6 | 5,8.2571428571429,982,35,0.14490511378646,0.019651415139437,10895.232643127 7 | 6,-1.3636363636364,735,22,0.156849973768,0.018027543753386,13843.955490112 8 | 7,4.0555555555556,607,18,0.15613037407398,0.03612357429415,16800.791962147 9 | 8,17.246376811594,1712,69,0.16600373826176,0.040893536631949,19781.40379405 10 | 9,8.2608695652174,1735,69,0.16087171534449,0.053060119055212,22781.630583048 11 | 10,-20.964705882353,1804,85,0.21962278755754,0.079659719966352,25832.945611 12 | 11,-18.073529411765,1622,68,0.32419759770483,0.085290238887072,28871.55139184 13 | 12,8.2558139534884,1330,43,0.38745628522336,0.087929436858743,31952.171814919 14 | 13,6.5526315789474,1282,38,0.45150299651921,0.09053659071913,34966.254708767 15 | 14,6,1087,32,0.49048775991797,0.080990801326931,37979.379287958 16 | 15,7.1935483870968,1058,31,0.54364813241363,0.075845399744809,41006.026421785 17 | 16,-2.8636363636364,843,22,0.6258010738343,0.073953761775978,44023.744074583 18 | 17,4.25,841,24,0.71101898576319,0.07438159080036,47071.221020699 19 | 18,1.6315789473684,732,19,0.78495635423064,0.065564899936318,50115.644058466 20 | 19,7,762,22,0.84685340037942,0.066515339031816,53143.724182606 21 | 20,7.4117647058824,599,17,0.89461297640204,0.06911209545657,56224.953065634 22 | 21,4.0526315789474,681,19,1.0150132971406,0.071963771987706,59252.510054588 23 | 22,-2.2631578947368,707,19,0.97882900413871,0.061374731684104,62302.120079756 24 | 23,-3.3571428571429,526,14,1.0373114794493,0.055309426490217,65340.640899897 25 | 24,0.47368421052632,750,19,1.0033243787885,0.056636636704206,68390.421947956 26 | 25,3.2941176470588,640,17,0.92811875632405,0.054018623730168,71446.502653837 27 | 26,3,656,18,0.93651082640886,0.055023560766131,74549.215828657 28 | 27,4,514,13,0.94254752320051,0.058531855151057,77634.196890831 29 | 28,4,524,14,0.89031834989786,0.056271550633013,80685.836214781 30 | 29,2.1764705882353,617,17,0.86999075606465,0.051859064750373,83716.217218876 31 | 30,1.625,564,16,0.82614176213741,0.057562429867685,86816.62901473 32 | 31,2.7333333333333,590,15,0.82187315291166,0.058495092704892,89896.952369928 33 | 32,4.6666666666667,539,15,0.789477578789,0.0587079064399,92944.94702816 34 | 33,-0.375,593,16,0.79569136279821,0.05894677489996,96033.474353313 35 | 34,5.4285714285714,517,14,0.78301355272532,0.054899339735508,99126.870426178 36 | 35,8.3333333333333,418,12,0.79044949617982,0.056301230795681,102218.8840003 37 | 36,7.2666666666667,544,15,0.80112966805696,0.055791450798512,105332.10956836 38 | 37,5.5333333333333,521,15,0.78027736347914,0.050847516030073,108390.310781 39 | 38,8.2857142857143,480,14,0.8137402973175,0.048504514113069,111577.00082707 40 | 39,8.2,525,15,0.79286936756968,0.052606489300728,114681.56327724 41 | 40,5.6666666666667,558,15,0.78303875213861,0.046839555993676,117785.27115202 42 | 41,4.1818181818182,400,11,0.81350393545628,0.055344005376101,120884.00687814 43 | 42,6.0833333333333,458,12,0.82606654143333,0.048681836031377,123987.18336725 44 | 43,1.0666666666667,569,15,0.79435331770778,0.057932917118073,127176.64506316 45 | 44,3,568,16,0.77271989172697,0.051193475157022,130457.61719894 46 | 45,1.3333333333333,447,12,0.77484880280495,0.045013084542006,133590.15817618 47 | 46,6.3571428571429,507,14,0.78015144973993,0.047682569071651,136745.98338032 48 | 47,0.78571428571429,538,14,0.77643180459738,0.049572788715363,140288.5203855 49 | 48,0.75,438,12,0.79014997059107,0.044922678057104,143582.21646833 50 | 49,9,569,17,0.80040570390224,0.049333242207766,146885.37541223 51 | 50,3.2857142857143,513,14,0.77136175608635,0.047377686411142,150514.73280025 52 | 51,-0.85714285714286,516,14,0.74489397323132,0.044408887729049,153905.42626715 53 | 52,0.66666666666667,355,9,0.75983708006144,0.04565731857717,156998.08905911 54 | 53,3.9230769230769,483,13,0.74724908590317,0.049826633743942,160223.2752831 55 | 54,7.3333333333333,431,12,0.74608357340097,0.044874432623386,163514.40027618 56 | 55,-0.25,476,12,0.72551223999262,0.050711758375168,166998.89249229 57 | 56,5.3846153846154,458,13,0.75233506691456,0.046698903981596,170359.70756102 58 | 57,3.2,359,10,0.73104023519158,0.04379932167381,173823.41051316 59 | 58,4.8571428571429,492,14,0.71849496692419,0.042877251973376,177196.93280435 60 | 59,2.0833333333333,411,12,0.72559829336405,0.043330619191751,180707.99882913 61 | 60,6.4545454545455,418,11,0.70747312837839,0.043838432177901,184368.79880023 62 | 61,6.6923076923077,478,13,0.70651694011688,0.046224248636514,188232.23590922 63 | 62,7.25,437,12,0.68852741104364,0.049908312603831,191757.69521713 64 | 63,5.3,369,10,0.66830239707232,0.042447510216385,195594.75462699 65 | 64,5.6153846153846,468,13,0.6702712700367,0.040575239777565,199862.88124394 66 | 65,4.3636363636364,424,11,0.66358332365751,0.045900904551148,203532.96440005 67 | 66,6.2,376,10,0.67817037689686,0.040241999074817,207733.68452883 68 | 67,8.6153846153846,444,13,0.69724605107307,0.047058273538947,211655.73910689 69 | 68,0.61538461538462,488,13,0.6848148176074,0.042222519740462,215466.90428996 70 | 69,4,486,13,0.6842090305686,0.046335531935096,219559.02877498 71 | 70,0.33333333333333,447,12,0.69121128064394,0.04058175695315,223804.84991622 72 | 71,6.5384615384615,472,13,0.69477743583918,0.045939372435212,227838.63679528 73 | 72,4.6363636363636,417,11,0.67862904149294,0.040864990450442,232027.32367206 74 | 73,8.2307692307692,443,13,0.67067655992508,0.044045647501945,236188.94491291 75 | 74,3.3333333333333,450,12,0.67136865311861,0.043262280382216,241381.24751687 76 | 75,3.5384615384615,478,13,0.65915973466635,0.044760326504707,245670.82948804 77 | 76,5.0833333333333,442,12,0.67250486266613,0.046776309124194,250570.50234628 78 | 77,1.3636363636364,452,11,0.66918101072311,0.049758086398244,258795.74901628 79 | 78,-2.9090909090909,444,11,0.68012343931198,0.042351133339107,265376.65494132 80 | 79,-1,433,11,0.70117538625002,0.044687234297395,273031.80326462 81 | 80,-0.4,407,10,0.71243719160557,0.043381349245086,279453.38454461 82 | 81,1.9,396,10,0.7182938054204,0.042881715647876,287339.86959958 83 | 82,6.5,427,12,0.7005225251317,0.045902971506119,294809.03612781 84 | 83,6.0769230769231,445,13,0.69715527999401,0.046531330704689,301846.70236588 85 | 84,2,446,12,0.69628106099367,0.046715326815844,308882.32642794 86 | 85,1.2,396,10,0.69748233801126,0.040650709867477,315383.48060513 87 | 86,5.2727272727273,397,11,0.70037325745821,0.040244948428124,321902.52821803 88 | 87,2,449,11,0.70343012046814,0.045980382755399,328601.83020878 89 | 88,6.3333333333333,431,12,0.69383743393421,0.041914516195655,334467.63142991 90 | 89,1.6666666666667,452,12,0.69825469052792,0.039878826737404,339582.35990906 91 | 90,0.090909090909091,414,11,0.68960511600971,0.040170164361596,345550.17142677 92 | 91,1.2727272727273,424,11,0.67836646288633,0.042912368401885,351840.05597472 93 | 92,2.3333333333333,447,12,0.68362768256664,0.043504333540797,357930.80812883 94 | 93,1.2,403,10,0.67348195332289,0.04388574296236,364560.41527987 95 | 94,1.9,423,10,0.6916718544364,0.043883678305894,369735.55567384 96 | 95,0.33333333333333,438,12,0.70352491497993,0.04276246599108,373734.55882072 97 | 96,-2.5,374,10,0.72184679532051,0.041803027048707,377166.37988877 98 | 97,-0.75,444,12,0.7270547606945,0.045335452474654,381859.93837285 99 | 98,3.0909090909091,422,11,0.72387615138292,0.043761293586344,389255.18656874 100 | 99,4.3636363636364,393,11,0.72118641269207,0.042639298856258,395686.37279058 101 | -------------------------------------------------------------------------------- /plots/Pong2PlayerVS_meanq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_meanq.png -------------------------------------------------------------------------------- /plots/Pong2PlayerVS_reward_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_reward_counts.png -------------------------------------------------------------------------------- /plots/Pong2PlayerVS_tderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_tderror.png -------------------------------------------------------------------------------- /plots/Pong2PlayerVS_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2PlayerVS_time.png -------------------------------------------------------------------------------- /plots/Pong2Player_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_average_reward.png -------------------------------------------------------------------------------- /plots/Pong2Player_episode_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_episode_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player_history_A.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-26.458333333333,654,24,-0.095357276380062,0.043428983919322,0 3 | 2,-24.260869565217,571,23,-0.25713111811876,0.049565929591656,2219.7107670307 4 | 3,-30.823529411765,535,17,-0.37960375964642,0.061796604514122,5234.8550970554 5 | 4,-29.375,719,24,-0.45105549693108,0.053154966473579,8274.6327157021 6 | 5,-27.578947368421,542,19,-0.51505306136608,0.072533971011639,11343.717128754 7 | 6,-27.289473684211,1038,38,-0.61315587735176,0.061384726524353,14424.585474968 8 | 7,-28.514285714286,1008,35,-0.8955296381712,0.055836423873901,17527.729490757 9 | 8,-22.910714285714,1294,56,-0.98565722322464,0.049536979198456,20646.920996666 10 | 9,-26.333333333333,573,21,-1.02711254704,0.050674617409706,23793.418364763 11 | 10,-21.958333333333,537,24,-1.0271174045801,0.049659738302231,26948.615488768 12 | 11,-28.705882352941,508,17,-1.0852300107479,0.045676375865936,30113.221090794 13 | 12,-31.333333333333,98,3,-1.0055357445478,0.045826857686043,33269.140145779 14 | 13,-30.9375,505,16,-0.96134313106537,0.044203288912773,36434.461924553 15 | 14,-28.705882352941,507,17,-0.91738075852394,0.046511339187622,39919.898360729 16 | 15,-28.705882352941,507,17,-0.87516816967726,0.045471202611923,43082.808995724 17 | 16,-36.818181818182,419,11,-0.85296364432573,0.050174029350281,46241.313314676 18 | 17,-37,394,10,-0.81926957434416,0.042408460795879,49401.823321819 19 | 18,-36.454545454545,417,11,-0.80404985666275,0.052845206677914,52556.488364697 20 | 19,-36.181818181818,403,11,-0.72174312716722,0.045161524236202,55721.547684669 21 | 20,-34.727272727273,391,11,-0.75188615107536,0.045518432587385,58879.465076685 22 | 21,-36.666666666667,366,9,-0.71129180133343,0.045481154322624,62042.817330837 23 | 22,-30.636363636364,359,11,-0.69675636130571,0.042603764116764,65203.508159876 24 | 23,-34,365,10,-0.66037705516815,0.045244661659002,68371.994807005 25 | 24,-37.888888888889,351,9,-0.6214785618186,0.041710187494755,71532.411481619 26 | 25,-34.555555555556,334,9,-0.57756735938787,0.038945589452982,74696.672069788 27 | 26,-35.666666666667,341,9,-0.54762472438812,0.041154150933027,77855.706784964 28 | 27,-33.777777777778,331,9,-0.52157988876104,0.040725954800844,81024.021785021 29 | 28,-37.125,308,8,-0.50267528045177,0.039348959892988,84182.163150072 30 | 29,-32.818181818182,388,11,-0.5208072988987,0.04023371258378,87344.461227179 31 | 30,-34,317,9,-0.50427635723352,0.039969101399183,90501.760588169 32 | 31,-35.111111111111,334,9,-0.46341560208797,0.039467708855867,93669.918260336 33 | 32,-33.7,345,10,-0.44287770795822,0.040555094107985,96827.402169228 34 | 33,-37.875,320,8,-0.39677744108438,0.040005072303116,99991.178019285 35 | 34,-34.75,300,8,-0.39570564180613,0.035729702591896,103149.88645959 36 | 35,-35.142857142857,273,7,-0.409107203722,0.039551196306944,106317.52060747 37 | 36,-35,316,9,-0.40082246625423,0.03927739597857,109477.61490846 38 | 37,-31.444444444444,304,9,-0.3964589330554,0.038392450235784,112644.04946756 39 | 38,-35,325,9,-0.35121801757812,0.038004578579217,115803.35801244 40 | 39,-34,308,9,-0.32286202985048,0.037074327471666,118968.0033195 41 | 40,-36.625,309,8,-0.32320038503408,0.037060650097206,122125.33657956 42 | 41,-38.142857142857,292,7,-0.32281405234337,0.036791522218846,125288.71097755 43 | 42,-34.25,295,8,-0.321888240695,0.038397595276125,128449.8854835 44 | 43,-36.428571428571,284,7,-0.33277552944422,0.035203957707388,131617.50346541 45 | 44,-36.875,297,8,-0.35373691809177,0.036649934265763,134780.73201251 46 | 45,-34.875,289,8,-0.36005285412073,0.039131956782192,137949.28987861 47 | 46,-33.75,277,8,-0.35520882689953,0.035049023443833,141109.40660763 48 | 47,-35.25,289,8,-0.33053499352932,0.036487454757094,144275.81889558 49 | 48,-35.25,307,8,-0.33954701691866,0.037912156413309,147435.31064677 50 | 49,-39.571428571429,282,7,-0.32449682414532,0.039220210487023,150603.13879275 51 | 50,-36,273,7,-0.31643177467585,0.037731511502527,153765.27083468 52 | 51,-33.714285714286,267,7,-0.30931244218349,0.038934745969018,156934.99299765 53 | 52,-36.428571428571,279,7,-0.28822984421253,0.040141627021134,160095.28643036 54 | 53,-34.875,289,8,-0.29541040021181,0.035895705933799,163261.62074041 55 | 54,-35.75,288,8,-0.27794497185946,0.040368533103727,166420.72678161 56 | 55,-36.142857142857,276,7,-0.27815217393637,0.037139108544681,169588.44667172 57 | 56,-36.714285714286,286,7,-0.25671535021067,0.039222533322871,172748.50242591 58 | 57,-37.285714285714,285,7,-0.25984488493204,0.037240513943136,175910.684515 59 | 58,-35.428571428571,270,7,-0.26506305646896,0.037857254445553,179070.61611986 60 | 59,-36.285714285714,279,7,-0.26481886297464,0.038876299795229,182233.66440177 61 | 60,-34.25,298,8,-0.27339893108606,0.03648377366364,185392.0522368 62 | 61,-38,279,7,-0.26885661417246,0.036164035882335,188557.70815563 63 | 62,-36.142857142857,286,7,-0.27515106755495,0.035154181305785,191718.52969551 64 | 63,-33.875,295,8,-0.29238852584362,0.038873594135512,194886.45836949 65 | 64,-31,261,8,-0.27444543755054,0.03615041842591,198048.08661747 66 | 65,-37.857142857143,280,7,-0.27285833537579,0.037310198902152,201217.75610328 67 | 66,-37.714285714286,284,7,-0.27229812979698,0.037948209465947,204377.43602824 68 | 67,-36.714285714286,273,7,-0.26851659548283,0.037649981425377,207544.57766247 69 | 68,-36.428571428571,287,7,-0.27455387759209,0.036888742106967,210706.8551352 70 | 69,-36,273,7,-0.30327244818211,0.036359902586089,213871.46873212 71 | 70,-35.5,244,6,-0.28581845986843,0.038536409900407,217029.25653219 72 | 71,-36.428571428571,266,7,-0.28054606413841,0.03837350646453,220194.61400127 73 | 72,-35,275,7,-0.30645019698143,0.037592195025645,223354.55645609 74 | 73,-32.5,275,8,-0.30246568739414,0.03762510857312,226520.75250101 75 | 74,-36.285714285714,265,7,-0.29572137546539,0.036622938405257,229680.09711981 76 | 75,-32.125,291,8,-0.27788608777523,0.043533025629818,232846.38811898 77 | 76,-33.571428571429,266,7,-0.25835147070885,0.03797403547354,236005.74047184 78 | 77,-35.666666666667,239,6,-0.25210114300251,0.037980964697897,239172.54759908 79 | 78,-39.333333333333,269,6,-0.24693970394135,0.039258848479018,242333.03757191 80 | 79,-32.375,276,8,-0.24059923732281,0.040176058832556,245498.53613424 81 | 80,-36.428571428571,270,7,-0.24444462656975,0.040253090277314,248660.04212308 82 | 81,-35.571428571429,267,7,-0.25956621682644,0.038885270408355,251829.45792484 83 | 82,-39,49,1,-0.25390129482746,0.038192279633135,254992.96653295 84 | 83,-36.8,201,5,-0.25089377653599,0.034228633511811,258160.73915601 85 | 84,0,32,0,-0.25882938551903,0.035521016081795,261648.22523189 86 | 85,-31,71,1,-0.26667385280132,0.036877594493562,264815.68217468 87 | 86,-36.142857142857,266,7,-0.26922864258289,0.035662436407525,268298.59088683 88 | 87,-36,245,6,-0.26433752679825,0.038297658568248,271804.28529191 89 | 88,-30.875,253,8,-0.26904065239429,0.038530012675561,274966.89393377 90 | 89,0,12,0,-0.2529178994894,0.03857961513754,278135.46529984 91 | 90,0,30,0,-0.25320659327507,0.036184652519412,281300.25560284 92 | 91,-35,257,7,-0.2507867937088,0.037512870031409,284785.61747599 93 | 92,-37.857142857143,279,7,-0.24493289411068,0.038173165585846,288264.99663019 94 | 93,0,8,0,-0.28029315757751,0.036752135819988,291439.21841502 95 | 94,0,11,0,-0.28757838642597,0.037329511399381,294786.05089426 96 | 95,0,37,0,-0.26499050378799,0.03641091414704,298514.9797523 97 | 96,0,14,0,-0.27337732851505,0.037253945605364,302185.86876249 98 | 97,-33.5,91,2,-0.26746787285805,0.03588946757419,305871.62223148 99 | 98,0,7,0,-0.27868657195568,0.03872905414924,309507.54011035 100 | -------------------------------------------------------------------------------- /plots/Pong2Player_history_B.csv: -------------------------------------------------------------------------------- 1 | Epoch,Average reward,Reward count,Episode count,MeanQ,TD Error,Seconds 2 | 1,-26.458333333333,654,24,-0.10316917291284,0.03171840660274,0 3 | 2,-24.260869565217,571,23,-0.21664407390356,0.020301910638809,2219.7107670307 4 | 3,-30.823529411765,535,17,-0.31796842944622,0.031760104954243,5234.8550970554 5 | 4,-29.375,719,24,-0.39784982442856,0.03718723988533,8274.6327157021 6 | 5,-27.578947368421,542,19,-0.49098414742947,0.046165034890175,11343.717128754 7 | 6,-27.289473684211,1038,38,-0.54109015369415,0.054801562190056,14424.585474968 8 | 7,-28.514285714286,1008,35,-0.68634745073318,0.046342434048653,17527.729490757 9 | 8,-22.910714285714,1294,56,-0.78734533989429,0.052283799111843,20646.920996666 10 | 9,-26.333333333333,573,21,-0.96680954873562,0.050795489311218,23793.418364763 11 | 10,-21.958333333333,537,24,-0.94446024823189,0.047748475790024,26948.615488768 12 | 11,-28.705882352941,508,17,-0.90576515161991,0.046088018536568,30113.221090794 13 | 12,-31.333333333333,98,3,-0.88200869899988,0.049653822243214,33269.140145779 14 | 13,-30.9375,505,16,-0.8679031342864,0.052557094037533,36434.461924553 15 | 14,-28.705882352941,507,17,-0.87953285455704,0.05192302185297,39919.898360729 16 | 15,-28.705882352941,507,17,-0.79656422615051,0.055768653512001,43082.808995724 17 | 16,-36.818181818182,419,11,-0.7973798545599,0.053919546604156,46241.313314676 18 | 17,-37,394,10,-0.79324904960394,0.056587139368057,49401.823321819 19 | 18,-36.454545454545,417,11,-0.75869192743301,0.056097362339497,52556.488364697 20 | 19,-36.181818181818,403,11,-0.72147205990553,0.056252910256386,55721.547684669 21 | 20,-34.727272727273,391,11,-0.69017216366529,0.050779387712479,58879.465076685 22 | 21,-36.666666666667,366,9,-0.70132182663679,0.048130309581757,62042.817330837 23 | 22,-30.636363636364,359,11,-0.68402341276407,0.047746060490608,65203.508159876 24 | 23,-34,365,10,-0.62059300380945,0.047794467508793,68371.994807005 25 | 24,-37.888888888889,351,9,-0.56811383879185,0.044462095856667,71532.411481619 26 | 25,-34.555555555556,334,9,-0.54428851336241,0.049049448788166,74696.672069788 27 | 26,-35.666666666667,341,9,-0.53194153410196,0.042404607594013,77855.706784964 28 | 27,-33.777777777778,331,9,-0.52397324371338,0.044501958101988,81024.021785021 29 | 28,-37.125,308,8,-0.49280105501413,0.043603889077902,84182.163150072 30 | 29,-32.818181818182,388,11,-0.45164683359861,0.048712482556701,87344.461227179 31 | 30,-34,317,9,-0.45899818623066,0.046882577434182,90501.760588169 32 | 31,-35.111111111111,334,9,-0.42163554221392,0.041218542262912,93669.918260336 33 | 32,-33.7,345,10,-0.41124828916788,0.042786059759557,96827.402169228 34 | 33,-37.875,320,8,-0.36733940595388,0.042581390786916,99991.178019285 35 | 34,-34.75,300,8,-0.36165858471394,0.041615742020309,103149.88645959 36 | 35,-35.142857142857,273,7,-0.36283807462454,0.04286250728555,106317.52060747 37 | 36,-35,316,9,-0.35213853293657,0.04135294483602,109477.61490846 38 | 37,-31.444444444444,304,9,-0.37873019325733,0.039976343035698,112644.04946756 39 | 38,-35,325,9,-0.37086350005865,0.045977050364017,115803.35801244 40 | 39,-34,308,9,-0.387158010602,0.04341797137633,118968.0033195 41 | 40,-36.625,309,8,-0.38804112797976,0.041255482595414,122125.33657956 42 | 41,-38.142857142857,292,7,-0.37829089355469,0.042000484826975,125288.71097755 43 | 42,-34.25,295,8,-0.41269392293692,0.043908545061946,128449.8854835 44 | 43,-36.428571428571,284,7,-0.35951180899143,0.040556112715974,131617.50346541 45 | 44,-36.875,297,8,-0.32541014826298,0.040843158771982,134780.73201251 46 | 45,-34.875,289,8,-0.34633871275187,0.040638292525196,137949.28987861 47 | 46,-33.75,277,8,-0.32167219752073,0.040835269892123,141109.40660763 48 | 47,-35.25,289,8,-0.33032851690054,0.040877664363012,144275.81889558 49 | 48,-35.25,307,8,-0.33778543055058,0.040645833931863,147435.31064677 50 | 49,-39.571428571429,282,7,-0.35659436476231,0.042142257492058,150603.13879275 51 | 50,-36,273,7,-0.34156268209219,0.038646750929765,153765.27083468 52 | 51,-33.714285714286,267,7,-0.33958718246222,0.04115854826197,156934.99299765 53 | 52,-36.428571428571,279,7,-0.33137782621384,0.040307806527708,160095.28643036 54 | 53,-34.875,289,8,-0.30994284754992,0.041334426323883,163261.62074041 55 | 54,-35.75,288,8,-0.29656387126446,0.040181493939599,166420.72678161 56 | 55,-36.142857142857,276,7,-0.29876182216406,0.041037688679993,169588.44667172 57 | 56,-36.714285714286,286,7,-0.30332996743917,0.040273162252735,172748.50242591 58 | 57,-37.285714285714,285,7,-0.31741343575716,0.03937012092676,175910.684515 59 | 58,-35.428571428571,270,7,-0.32956989979744,0.042279763463885,179070.61611986 60 | 59,-36.285714285714,279,7,-0.33301015365124,0.04055048728548,182233.66440177 61 | 60,-34.25,298,8,-0.3283772597909,0.039634348405059,185392.0522368 62 | 61,-38,279,7,-0.32743104177713,0.043790440815501,188557.70815563 63 | 62,-36.142857142857,286,7,-0.32915987682343,0.04203778380272,191718.52969551 64 | 63,-33.875,295,8,-0.36024566584826,0.042172495619394,194886.45836949 65 | 64,-31,261,8,-0.35764328289032,0.040637518398464,198048.08661747 66 | 65,-37.857142857143,280,7,-0.34844648963213,0.040825937362388,201217.75610328 67 | 66,-37.714285714286,284,7,-0.33217490136623,0.040230352470651,204377.43602824 68 | 67,-36.714285714286,273,7,-0.32567139828205,0.041850845268928,207544.57766247 69 | 68,-36.428571428571,287,7,-0.29867742705345,0.03995036158897,210706.8551352 70 | 69,-36,273,7,-0.27895277392864,0.041134778410196,213871.46873212 71 | 70,-35.5,244,6,-0.28795940876007,0.041708505763207,217029.25653219 72 | 71,-36.428571428571,266,7,-0.29409499913454,0.037932520796545,220194.61400127 73 | 72,-35,275,7,-0.30484864252806,0.036504096915596,223354.55645609 74 | 73,-32.5,275,8,-0.30799613463879,0.043229171217419,226520.75250101 75 | 74,-36.285714285714,265,7,-0.29752028590441,0.043337372859009,229680.09711981 76 | 75,-32.125,291,8,-0.30138940262794,0.044016310835257,232846.38811898 77 | 76,-33.571428571429,266,7,-0.29435864454508,0.043229652024806,236005.74047184 78 | 77,-35.666666666667,239,6,-0.31954347169399,0.040958518084604,239172.54759908 79 | 78,-39.333333333333,269,6,-0.30283958315849,0.042423107903916,242333.03757191 80 | 79,-32.375,276,8,-0.29783964937925,0.039569395420607,245498.53613424 81 | 80,-36.428571428571,270,7,-0.27939842480421,0.041614939747378,248660.04212308 82 | 81,-35.571428571429,267,7,-0.27845881319046,0.041564103236422,251829.45792484 83 | 82,-39,49,1,-0.29774576640129,0.041946764576249,254992.96653295 84 | 83,-36.8,201,5,-0.29520063459873,0.041044882636517,258160.73915601 85 | 84,0,32,0,-0.30711828100681,0.03780022122385,261648.22523189 86 | 85,-31,71,1,-0.28975769090652,0.041091928943992,264815.68217468 87 | 86,-36.142857142857,266,7,-0.27943712234497,0.038912671593949,268298.59088683 88 | 87,-36,245,6,-0.25322439110279,0.039827098444104,271804.28529191 89 | 88,-30.875,253,8,-0.28580475902557,0.040129517577589,274966.89393377 90 | 89,0,12,0,-0.30222401273251,0.040915508701699,278135.46529984 91 | 90,0,30,0,-0.31114657723904,0.040820374908857,281300.25560284 92 | 91,-35,257,7,-0.30451617670059,0.039868107420392,284785.61747599 93 | 92,-37.857142857143,279,7,-0.32563604104519,0.038739177445881,288264.99663019 94 | 93,0,8,0,-0.34277233123779,0.041662729106494,291439.21841502 95 | 94,0,11,0,-0.34442797040939,0.038347307886463,294786.05089426 96 | 95,0,37,0,-0.32848795676231,0.040192691942677,298514.9797523 97 | 96,0,14,0,-0.31607774305344,0.038031559703173,302185.86876249 98 | 97,-33.5,91,2,-0.32555533874035,0.040140081637539,305871.62223148 99 | 98,0,7,0,-0.30362955224514,0.039050936597399,309507.54011035 100 | -------------------------------------------------------------------------------- /plots/Pong2Player_meanq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_meanq.png -------------------------------------------------------------------------------- /plots/Pong2Player_reward_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_reward_counts.png -------------------------------------------------------------------------------- /plots/Pong2Player_tderror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_tderror.png -------------------------------------------------------------------------------- /plots/Pong2Player_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/Pong2Player_time.png -------------------------------------------------------------------------------- /plots/plot.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import matplotlib as mpl 5 | 6 | csv_files = [ 7 | "Pong2Player.csv", 8 | "Pong2PlayerVS.csv", 9 | # "Pong2Player05p.csv", 10 | ] 11 | 12 | labels = [ 13 | "Cooperative", 14 | "Competitive", 15 | # "rho = 0.5", 16 | ] 17 | 18 | suffixes = [ 19 | "cooperative", 20 | "competitive", 21 | # "05p", 22 | ] 23 | 24 | data = [] 25 | for i, file_name in enumerate(csv_files): 26 | data.append(dict()) 27 | with open(file_name, 'rb') as csv_file: 28 | csv_reader = csv.reader(csv_file, delimiter=";") 29 | csv_reader.next() # skip first row 30 | for row in csv_reader: 31 | if not row[0].isdigit(): 32 | continue 33 | epoch = int(row[0]) 34 | if epoch not in data[i].keys(): 35 | data[i][epoch] = [] 36 | wallbounces = float(row[2]) 37 | sidebounces = float(row[3]) 38 | points = float(row[4]) 39 | servingtime = float(row[5]) 40 | data[i][epoch].append(( 41 | sidebounces / points, 42 | wallbounces / (wallbounces if sidebounces == 0 else sidebounces), 43 | servingtime / points / 4 44 | )) 45 | 46 | epochs = [] 47 | means = [] 48 | stds = [] 49 | for i, file_name in enumerate(csv_files): 50 | epochs.append([]) 51 | means.append([]) 52 | stds.append([]) 53 | for epoch in sorted(data[i].keys()): 54 | epochs[i].append(int(epoch)) 55 | a = np.array(data[i][epoch]) 56 | mean = np.mean(a, axis=0) 57 | means[i].append(mean) 58 | std = np.std(a, axis=0) 59 | stds[i].append(std) 60 | 61 | epochs = [np.array(a) for a in epochs] 62 | means = [np.array(a) for a in means] 63 | stds = [np.array(a) for a in stds] 64 | 65 | mpl.rcParams['lines.linewidth'] = 2 66 | mpl.rcParams['xtick.labelsize'] = 'small' 67 | mpl.rcParams['ytick.labelsize'] = 'small' 68 | mpl.rcParams['axes.labelsize'] = 'small' 69 | mpl.rcParams['legend.fontsize'] = 'small' 70 | 71 | dpi = 300 72 | plt.figure(figsize=(4,3)) 73 | 74 | for i, file_name in enumerate(csv_files): 75 | #plt.errorbar(epochs[i][10:], means[i][10:,1], yerr=stds[i][10:,1]) 76 | plt.plot(epochs[i][10:], means[i][10:,1]) 77 | plt.locator_params(axis='y', nbins=5) 78 | plt.ylabel("Wall-bounces per paddle-bounce") 79 | plt.xlabel("Epoch") 80 | lgd = plt.legend(labels, loc="lower center", bbox_to_anchor=(0.43, 1.), 81 | ncol=2, columnspacing=1, frameon=False) 82 | plt.tight_layout() 83 | plt.savefig('wallbounces_history.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight') 84 | 85 | plt.clf() 86 | #f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(4,3)) 87 | #ax = plt.subplot2grid((2,2), (0,0), rowspan=2) 88 | #ax1 = plt.subplot2grid((2,2), (0,1)) 89 | #ax2 = plt.subplot2grid((2,2), (1,1)) 90 | f = plt.figure(figsize=(4,3)) 91 | ax = f.add_subplot(111) # The big subplot 92 | ax1 = f.add_subplot(211) 93 | ax2 = f.add_subplot(212) 94 | for i, file_name in enumerate(csv_files): 95 | #plt.errorbar(epochs[i], means[i][:,1], yerr=stds[i][:,1]) 96 | ax1.plot(epochs[i], means[i][:,0]) 97 | ax2.plot(epochs[i], means[i][:,0]) 98 | 99 | ax1.set_ylim(16, 400) 100 | ax2.set_ylim(0, 11) 101 | ax1.spines['bottom'].set_visible(False) 102 | ax2.spines['top'].set_visible(False) 103 | ax1.xaxis.tick_top() 104 | ax1.tick_params(labeltop='off') # don't put tick labels at the top 105 | ax2.xaxis.tick_bottom() 106 | ax1.locator_params(axis='y', nbins=3) 107 | ax2.locator_params(axis='y', nbins=3) 108 | 109 | d = .015 # how big to make the diagonal lines in axes coordinates 110 | # arguments to pass plot, just so we don't keep repeating them 111 | kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False) 112 | ax1.plot((-d, +d), (-d, +d), **kwargs) # top-left diagonal 113 | ax1.plot((1 - d, 1 + d), (-d, +d), **kwargs) # top-right diagonal 114 | 115 | kwargs.update(transform=ax2.transAxes) # switch to the bottom axes 116 | ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs) # bottom-left diagonal 117 | ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs) # bottom-right diagonal 118 | 119 | ax.spines['top'].set_color('none') 120 | ax.spines['bottom'].set_color('none') 121 | ax.spines['left'].set_color('none') 122 | ax.spines['right'].set_color('none') 123 | ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off') 124 | 125 | ax.set_ylabel("Paddle-bounces per point", labelpad=10) 126 | ax.set_xlabel("Epoch") 127 | #ax = plt.gca() 128 | #ax.set_yscale("log") 129 | lgd = ax1.legend(labels, loc="lower center", bbox_to_anchor=(0.43, 1.), 130 | ncol=2, columnspacing=1, frameon=False) 131 | plt.tight_layout() 132 | f.subplots_adjust(hspace=0.1) 133 | plt.savefig('sidebounces_history.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight') 134 | 135 | plt.clf() 136 | for i, file_name in enumerate(csv_files): 137 | #plt.errorbar(epochs[i], means[i][:,2], yerr=stds[i][:,2]) 138 | plt.plot(epochs[i], means[i][:,2]) 139 | plt.locator_params(axis='y', nbins=5) 140 | plt.ylabel("Serving time per point") 141 | plt.xlabel("Epoch") 142 | lgd = plt.legend(labels, loc="lower center", bbox_to_anchor=(0.41, 1.), 143 | ncol=2, columnspacing=1, frameon=False) 144 | plt.tight_layout() 145 | plt.savefig('serving_time_history.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight') 146 | 147 | for i, file_name in enumerate(csv_files): 148 | plt.clf() 149 | plt.plot(epochs[i][10:], means[i][10:,1]) 150 | plt.locator_params(axis='y', nbins=5) 151 | plt.ylabel("Wall-bounces per paddle-bounce") 152 | plt.xlabel("Epoch") 153 | plt.tight_layout() 154 | plt.savefig('wallbounces_history_%s.png' % suffixes[i], dpi=dpi) 155 | 156 | plt.clf() 157 | plt.plot(epochs[i], means[i][:,0]) 158 | plt.locator_params(axis='y', nbins=5) 159 | plt.ylabel("Paddle-bounces per point") 160 | plt.xlabel("Epoch") 161 | #ax = plt.gca() 162 | #ax.set_yscale("log") 163 | #plt.ylim(ymin=-10) 164 | plt.tight_layout() 165 | plt.savefig('sidebounces_history_%s.png' % suffixes[i], dpi=dpi) 166 | 167 | plt.clf() 168 | plt.plot(epochs[i], means[i][:,2]) 169 | plt.locator_params(axis='y', nbins=5) 170 | plt.ylabel("Serving time per point") 171 | plt.xlabel("Epoch") 172 | plt.tight_layout() 173 | plt.savefig('serving_time_history_%s.png' % suffixes[i], dpi=dpi) 174 | -------------------------------------------------------------------------------- /plots/plot_history.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import matplotlib as mpl 5 | 6 | csv_files = [ 7 | "Pong2PlayerVS", 8 | "Pong2Player075p", 9 | "Pong2Player05p", 10 | "Pong2Player025p", 11 | "Pong2Player0", 12 | "Pong2Player025", 13 | "Pong2Player05", 14 | "Pong2Player075", 15 | "Pong2Player", 16 | ] 17 | 18 | mpl.rcParams['lines.linewidth'] = 2 19 | mpl.rcParams['xtick.labelsize'] = 'small' 20 | mpl.rcParams['ytick.labelsize'] = 'small' 21 | mpl.rcParams['axes.labelsize'] = 'small' 22 | mpl.rcParams['legend.fontsize'] = 'small' 23 | 24 | dpi = 300 25 | plt.figure(figsize=(4,3)) 26 | 27 | data = [] 28 | for i, file_name in enumerate(csv_files): 29 | print file_name 30 | data_a = np.loadtxt(file_name + "_history_A.csv", delimiter = ",", skiprows = 1) 31 | data_b = np.loadtxt(file_name + "_history_B.csv", delimiter = ",", skiprows = 1) 32 | 33 | plt.clf() 34 | plt.plot(data_a[:,0] / 2, data_a[:,1]) 35 | plt.plot(data_b[:,0] / 2, data_b[:,1]) 36 | plt.ylabel("Average reward") 37 | plt.xlabel("Epoch") 38 | lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.), 39 | ncol=2, columnspacing=1, frameon=False) 40 | #plt.tight_layout() 41 | plt.savefig(file_name + '_average_reward.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight') 42 | 43 | plt.clf() 44 | plt.plot(data_a[:,0] / 2, data_a[:,2]) 45 | plt.plot(data_b[:,0] / 2, data_b[:,2]) 46 | plt.ylabel("Reward count") 47 | plt.xlabel("Epoch") 48 | lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.), 49 | ncol=2, columnspacing=1, frameon=False) 50 | #plt.tight_layout() 51 | plt.savefig(file_name + '_reward_counts.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight') 52 | 53 | plt.clf() 54 | plt.plot(data_a[:,0] / 2, data_a[:,3]) 55 | plt.plot(data_b[:,0] / 2, data_b[:,3]) 56 | plt.ylabel("Episode count") 57 | plt.xlabel("Epoch") 58 | lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.), 59 | ncol=2, columnspacing=1, frameon=False) 60 | #plt.tight_layout() 61 | plt.savefig(file_name + '_episode_counts.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight') 62 | 63 | plt.clf() 64 | plt.plot(data_a[:,0] / 2, data_a[:,4]) 65 | plt.plot(data_b[:,0] / 2, data_b[:,4]) 66 | plt.ylabel("Mean Q-value") 67 | plt.xlabel("Epoch") 68 | lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.), 69 | ncol=2, columnspacing=1, frameon=False) 70 | #plt.tight_layout() 71 | plt.savefig(file_name + '_meanq.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight') 72 | 73 | plt.clf() 74 | plt.plot(data_a[:,0] / 2, data_a[:,5]) 75 | plt.plot(data_b[:,0] / 2, data_b[:,5]) 76 | plt.ylabel("TD error") 77 | plt.xlabel("Epoch") 78 | lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.), 79 | ncol=2, columnspacing=1, frameon=False) 80 | #plt.tight_layout() 81 | plt.savefig(file_name + '_tderror.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight') 82 | 83 | plt.clf() 84 | plt.plot(data_a[:,0] / 2, data_a[:,6]) 85 | plt.plot(data_b[:,0] / 2, data_b[:,6]) 86 | plt.ylabel("Time (seconds)") 87 | plt.xlabel("Epoch") 88 | lgd = plt.legend(["Right player", "Left player"], loc="lower center", bbox_to_anchor=(0.41, 1.), 89 | ncol=2, columnspacing=1, frameon=False) 90 | #plt.tight_layout() 91 | plt.savefig(file_name + '_time.png', dpi=dpi, bbox_extra_artists=(lgd,), bbox_inches='tight') 92 | -------------------------------------------------------------------------------- /plots/scatter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import csv 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import matplotlib as mpl 7 | 8 | csv_files = [ 9 | ("Pong2PlayerVS.csv", "Competitive $\rho=1$"), 10 | ("Pong2Player075p.csv", "Transition $\rho=0.75$"), 11 | ("Pong2Player05p.csv", "Transition $\rho=0.5$"), 12 | ("Pong2Player025p.csv", "Transition $\rho=0.25$"), 13 | ("Pong2Player0.csv", "Transition $\rho=0$"), 14 | ("Pong2Player025.csv", "Transition $\rho=-0.25$"), 15 | ("Pong2Player05.csv", "Transition $\rho=-0.5$"), 16 | ("Pong2Player075.csv", "Transition $\rho=-0.75$"), 17 | ("Pong2Player.csv", "Cooperative $\rho=-1$"), 18 | ] 19 | 20 | labels = [ 21 | r'$\rho = 1$' + '\n(competitive)', 22 | r'$0.75$', 23 | r'$0.5$', 24 | r'$0.25$', 25 | r'$0$', 26 | r'$-0.25$', 27 | r'$-0.5$', 28 | r'$-0.75$', 29 | r'$-1$' + '\n(cooperative)', 30 | ] 31 | 32 | sideBouncePerPoint = [] 33 | wallBouncePerSideBounce = [] 34 | avgServingTime = [] 35 | for csv_file, label in csv_files: 36 | with open(csv_file, 'rb') as input: 37 | csv_reader = csv.reader(input, delimiter=";") 38 | csv_reader.next() # skip first row 39 | 40 | sideBounce = [] 41 | wallBounce = [] 42 | points = [] 43 | servingTime = [] 44 | 45 | for values in csv_reader: 46 | if not values[0].isdigit(): 47 | continue 48 | if int(values[0]) != 49: 49 | continue 50 | 51 | wallBounce.append(float(values[2])) 52 | sideBounce.append(float(values[3])) 53 | points.append(float(values[4])) 54 | servingTime.append(float(values[5])) 55 | 56 | sideBouncePerPoint.append(map(lambda x, y: x / y, sideBounce, points)) 57 | wallBouncePerSideBounce.append(map(lambda x, y: x / y, wallBounce, sideBounce)) 58 | avgServingTime.append(map(lambda x, y: x / y / 4, servingTime, points)) 59 | 60 | sideBouncePerPoint = np.array(sideBouncePerPoint) 61 | wallBouncePerSideBounce = np.array(wallBouncePerSideBounce) 62 | avgServingTime = np.array(avgServingTime) 63 | x = range(sideBouncePerPoint.shape[0]) * sideBouncePerPoint.shape[1] 64 | #x += 0.02 * np.random.randn(len(x)) 65 | 66 | mpl.rcParams['lines.linewidth'] = 2 67 | mpl.rcParams['xtick.labelsize'] = 'small' 68 | mpl.rcParams['ytick.labelsize'] = 'small' 69 | mpl.rcParams['axes.labelsize'] = 'small' 70 | mpl.rcParams['legend.fontsize'] = 'small' 71 | 72 | dpi = 300 73 | plt.figure(figsize=(4.6,3.45)) 74 | 75 | plt.scatter(x, sideBouncePerPoint.T.ravel(), alpha = 0.5) 76 | plt.ylabel("Paddle-bounces per point") 77 | #ax = plt.gca() 78 | #ax.set_yscale("log") 79 | plt.xticks(x, labels) 80 | plt.locator_params(axis='y', nbins=7) 81 | plt.xlim((-1, sideBouncePerPoint.shape[0])) 82 | plt.ylim(ymin=-400) 83 | plt.tight_layout() 84 | plt.savefig('sidebounces_per_point_scatter.png', dpi=dpi) 85 | 86 | plt.clf() 87 | plt.scatter(x, wallBouncePerSideBounce.T.ravel(), alpha = 0.5) 88 | plt.ylabel("Wall-bounces per paddle-bounce") 89 | #ax = plt.gca() 90 | #ax.set_yscale("log") 91 | plt.xticks(x, labels) 92 | plt.locator_params(axis='y', nbins=7) 93 | plt.xlim((-1, wallBouncePerSideBounce.shape[0])) 94 | plt.ylim(ymin=-0.1) 95 | plt.tight_layout() 96 | plt.savefig('wallbounces_per_sidebounce_scatter.png', dpi=dpi) 97 | 98 | plt.clf() 99 | plt.scatter(x, avgServingTime.T.ravel(), alpha = 0.5) 100 | plt.ylabel("Serving time per point") 101 | #ax = plt.gca() 102 | #ax.set_yscale("log") 103 | plt.xticks(x, labels) 104 | plt.locator_params(axis='y', nbins=7) 105 | plt.xlim((-1, avgServingTime.shape[0])) 106 | plt.ylim(ymin=-100) 107 | plt.tight_layout() 108 | plt.savefig('serving_time_per_point_scatter.png', dpi=dpi) 109 | -------------------------------------------------------------------------------- /plots/serving_time_history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_history.png -------------------------------------------------------------------------------- /plots/serving_time_history_competitive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_history_competitive.png -------------------------------------------------------------------------------- /plots/serving_time_history_cooperative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_history_cooperative.png -------------------------------------------------------------------------------- /plots/serving_time_per_point.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_per_point.png -------------------------------------------------------------------------------- /plots/serving_time_per_point_scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/serving_time_per_point_scatter.png -------------------------------------------------------------------------------- /plots/sidebounces_history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_history.png -------------------------------------------------------------------------------- /plots/sidebounces_history_competitive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_history_competitive.png -------------------------------------------------------------------------------- /plots/sidebounces_history_cooperative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_history_cooperative.png -------------------------------------------------------------------------------- /plots/sidebounces_per_point.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_per_point.png -------------------------------------------------------------------------------- /plots/sidebounces_per_point_scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/sidebounces_per_point_scatter.png -------------------------------------------------------------------------------- /plots/stats.tex: -------------------------------------------------------------------------------- 1 | Competitive $\rho=1$ & $ 7.15 \pm 1.01 $ & $ 0.87 \pm 0.08 $ & $ 113.87 \pm 40.30 $ \\ 2 | Transition $\rho=0.75$ & $ 7.58 \pm 0.71 $ & $ 0.83 \pm 0.06 $ & $ 129.03 \pm 38.81 $ \\ 3 | Transition $\rho=0.5$ & $ 6.93 \pm 0.49 $ & $ 0.64 \pm 0.03 $ & $ 147.69 \pm 41.02 $ \\ 4 | Transition $\rho=0.25$ & $ 4.49 \pm 0.43 $ & $ 1.11 \pm 0.07 $ & $ 275.90 \pm 38.69 $ \\ 5 | Transition $\rho=0$ & $ 4.31 \pm 0.25 $ & $ 0.78 \pm 0.05 $ & $ 407.64 \pm 100.79 $ \\ 6 | Transition $\rho=-0.25$ & $ 5.21 \pm 0.36 $ & $ 0.60 \pm 0.05 $ & $ 449.18 \pm 99.53 $ \\ 7 | Transition $\rho=-0.5$ & $ 6.20 \pm 0.20 $ & $ 0.38 \pm 0.04 $ & $ 433.39 \pm 98.77 $ \\ 8 | Transition $\rho=-0.75$ & $ 409.50 \pm 535.24 $ & $ 0.02 \pm 0.01 $ & $ 591.62 \pm 302.15 $ \\ 9 | Cooperative $\rho=-1$ & $ 654.66 \pm 542.67 $ & $ 0.01 \pm 0.00 $ & $ 393.34 \pm 138.63 $ \\ 10 | -------------------------------------------------------------------------------- /plots/table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import csv 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import matplotlib as mpl 7 | 8 | csv_files = [ 9 | ("Pong2PlayerVS.csv", "Competitive $\\rho=1$"), 10 | ("Pong2Player075p.csv", "Transition $\\rho=0.75$"), 11 | ("Pong2Player05p.csv", "Transition $\\rho=0.5$"), 12 | ("Pong2Player025p.csv", "Transition $\\rho=0.25$"), 13 | ("Pong2Player0.csv", "Transition $\\rho=0$"), 14 | ("Pong2Player025.csv", "Transition $\\rho=-0.25$"), 15 | ("Pong2Player05.csv", "Transition $\\rho=-0.5$"), 16 | ("Pong2Player075.csv", "Transition $\\rho=-0.75$"), 17 | ("Pong2Player.csv", "Cooperative $\\rho=-1$"), 18 | ] 19 | 20 | labels = [ 21 | r'$\rho = 1$' + '\n(competitive)', 22 | r'$0.75$', 23 | r'$0.5$', 24 | r'$0.25$', 25 | r'$0$', 26 | r'$-0.25$', 27 | r'$-0.5$', 28 | r'$-0.75$', 29 | r'$-1$' + '\n(cooperative)', 30 | ] 31 | 32 | stats = [] 33 | with open("stats.tex","w") as output: 34 | #output.write("\hline\n") 35 | #output.write("Agent & Average paddle-bounces per point & Average wall-bounces per paddle-bounce & Average serving time per point \\\\\n") 36 | #output.write("\hline\n") 37 | for csv_file, label in csv_files: 38 | with open(csv_file, 'rb') as input: 39 | csv_reader = csv.reader(input, delimiter=";") 40 | csv_reader.next() # skip first row 41 | 42 | sideBounce = [] 43 | wallBounce = [] 44 | points = [] 45 | servingTime = [] 46 | 47 | for values in csv_reader: 48 | if not values[0].isdigit(): 49 | continue 50 | if int(values[0]) != 49: 51 | continue 52 | 53 | wallBounce.append(float(values[2])) 54 | sideBounce.append(float(values[3])) 55 | points.append(float(values[4])) 56 | servingTime.append(float(values[5])) 57 | 58 | sideBouncePerPoint = map(lambda x, y: x / y, sideBounce, points) 59 | wallBouncePerSideBounce = map(lambda x, y: x / y, wallBounce, sideBounce) 60 | avgServingTime = map(lambda x, y: x / y / 4, servingTime, points) 61 | 62 | data = (label, np.mean(sideBouncePerPoint), np.std(sideBouncePerPoint), 63 | np.mean(wallBouncePerSideBounce), np.std(wallBouncePerSideBounce), 64 | np.mean(avgServingTime), np.std(avgServingTime)) 65 | stats.append(data[1:]) 66 | 67 | output.write("%s & $ %.2f \pm %.2f $ & $ %.2f \pm %.2f $ & $ %.2f \pm %.2f $ \\\\\n" % data) 68 | #output.write("\hline\n") 69 | 70 | stats = np.array(stats) 71 | x = range(1, stats.shape[0] + 1) 72 | 73 | mpl.rcParams['lines.linewidth'] = 2 74 | mpl.rcParams['xtick.labelsize'] = 'small' 75 | mpl.rcParams['ytick.labelsize'] = 'small' 76 | mpl.rcParams['axes.labelsize'] = 'small' 77 | mpl.rcParams['legend.fontsize'] = 'small' 78 | 79 | dpi = 300 80 | plt.figure(figsize=(4,3)) 81 | 82 | plt.errorbar(x, stats[:,0], yerr=stats[:,1]) 83 | plt.ylabel("Paddle-bounces per point") 84 | #ax = plt.gca() 85 | #ax.set_yscale("log") 86 | plt.xticks(x, labels) 87 | plt.locator_params(axis='y', nbins=5) 88 | plt.xlim((0, len(x) + 1)) 89 | plt.tight_layout() 90 | plt.savefig('sidebounces_per_point.png', dpi=dpi) 91 | 92 | plt.clf() 93 | plt.errorbar(x, stats[:,2], yerr=stats[:,3]) 94 | plt.ylabel("Wall-bounces per paddle-bounce") 95 | #ax = plt.gca() 96 | #ax.set_yscale("log") 97 | plt.xticks(x, labels) 98 | plt.locator_params(axis='y', nbins=5) 99 | plt.xlim((0, len(x) + 1)) 100 | plt.tight_layout() 101 | plt.savefig('wallbounces_per_sidebounce.png', dpi=dpi) 102 | 103 | plt.clf() 104 | plt.errorbar(x, stats[:,4], yerr=stats[:,5]) 105 | plt.ylabel("Serving time per point") 106 | #ax = plt.gca() 107 | #ax.set_yscale("log") 108 | plt.xticks(x, labels) 109 | plt.locator_params(axis='y', nbins=5) 110 | plt.xlim((0, len(x) + 1)) 111 | plt.tight_layout() 112 | plt.savefig('serving_time_per_point.png', dpi=dpi) 113 | -------------------------------------------------------------------------------- /plots/wallbounces_history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_history.png -------------------------------------------------------------------------------- /plots/wallbounces_history_competitive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_history_competitive.png -------------------------------------------------------------------------------- /plots/wallbounces_history_cooperative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_history_cooperative.png -------------------------------------------------------------------------------- /plots/wallbounces_per_sidebounce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_per_sidebounce.png -------------------------------------------------------------------------------- /plots/wallbounces_per_sidebounce_scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/plots/wallbounces_per_sidebounce_scatter.png -------------------------------------------------------------------------------- /roms/Pong2Player.bin: -------------------------------------------------------------------------------- 1 | pong.bin -------------------------------------------------------------------------------- /roms/Pong2Player0.bin: -------------------------------------------------------------------------------- 1 | pong.bin -------------------------------------------------------------------------------- /roms/Pong2Player025.bin: -------------------------------------------------------------------------------- 1 | pong.bin -------------------------------------------------------------------------------- /roms/Pong2Player025p.bin: -------------------------------------------------------------------------------- 1 | pong.bin -------------------------------------------------------------------------------- /roms/Pong2Player05.bin: -------------------------------------------------------------------------------- 1 | pong.bin -------------------------------------------------------------------------------- /roms/Pong2Player05p.bin: -------------------------------------------------------------------------------- 1 | pong.bin -------------------------------------------------------------------------------- /roms/Pong2Player075.bin: -------------------------------------------------------------------------------- 1 | pong.bin -------------------------------------------------------------------------------- /roms/Pong2Player075p.bin: -------------------------------------------------------------------------------- 1 | pong.bin -------------------------------------------------------------------------------- /roms/Pong2PlayerVS.bin: -------------------------------------------------------------------------------- 1 | pong.bin -------------------------------------------------------------------------------- /roms/README: -------------------------------------------------------------------------------- 1 | Rom files should be put in this directory 2 | -------------------------------------------------------------------------------- /roms/breakout.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/roms/breakout.bin -------------------------------------------------------------------------------- /roms/breakout_2player.bin: -------------------------------------------------------------------------------- 1 | breakout.bin -------------------------------------------------------------------------------- /roms/pong.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/roms/pong.bin -------------------------------------------------------------------------------- /roms/surround.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/roms/surround.bin -------------------------------------------------------------------------------- /roms/wizard_of_wor.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuroCSUT/DeepMind-Atari-Deep-Q-Learner-2Player/feb0b8acd761eb47cca04f8a80c9c998c34d7a35/roms/wizard_of_wor.bin -------------------------------------------------------------------------------- /roms/wizard_of_wor_2player.bin: -------------------------------------------------------------------------------- 1 | wizard_of_wor.bin -------------------------------------------------------------------------------- /run_cpu: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. ./run_cpu breakout "; exit 0 5 | fi 6 | ENV=$1 7 | FRAMEWORK="alewrap" 8 | 9 | game_path=$PWD"/roms/" 10 | env_params="useRGB=true" 11 | agent="NeuralQLearner" 12 | n_replay=1 13 | netfile="\"convnet_atari3\"" 14 | update_freq=4 15 | actrep=4 16 | discount=0.99 17 | seed=1 18 | learn_start=50000 19 | pool_frms_type="\"max\"" 20 | pool_frms_size=2 21 | initial_priority="false" 22 | replay_memory=1000000 23 | eps_end=0.1 24 | eps_endt=replay_memory 25 | lr=0.00025 26 | agent_type="DQN3_0_1" 27 | preproc_net="\"net_downsample_2x_full_y\"" 28 | agent_name=$agent_type"_"$1"_FULL_Y" 29 | state_dim=7056 30 | ncols=1 31 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1" 32 | steps=50000000 33 | eval_freq=250000 34 | eval_steps=125000 35 | prog_freq=5000 36 | save_freq=125000 37 | save_versions=$save_freq 38 | gpu=-1 39 | random_starts=30 40 | pool_frms="type="$pool_frms_type",size="$pool_frms_size 41 | num_threads=4 42 | 43 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -steps $steps -eval_freq $eval_freq -eval_steps $eval_steps -prog_freq $prog_freq -save_freq $save_freq -save_versions $save_versions -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads" 44 | echo $args 45 | 46 | cd dqn 47 | ../torch/bin/qlua train_agent.lua $args 48 | -------------------------------------------------------------------------------- /run_gpu: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. ./run_gpu breakout "; exit 0 5 | fi 6 | ENV=$1 7 | FRAMEWORK="alewrap" 8 | 9 | game_path=$PWD"/roms/" 10 | env_params="useRGB=true" 11 | agent="NeuralQLearner" 12 | n_replay=1 13 | netfile="\"convnet_atari3\"" 14 | update_freq=4 15 | actrep=4 16 | discount=0.99 17 | seed=1 18 | learn_start=50000 19 | pool_frms_type="\"max\"" 20 | pool_frms_size=2 21 | initial_priority="false" 22 | replay_memory=1000000 23 | eps_end=0.1 24 | eps_endt=replay_memory 25 | lr=0.00025 26 | agent_type="DQN3_0_1" 27 | preproc_net="\"net_downsample_2x_full_y\"" 28 | agent_name=$agent_type"_"$1"_FULL_Y" 29 | state_dim=7056 30 | ncols=1 31 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1" 32 | steps=50000000 33 | eval_freq=250000 34 | eval_steps=125000 35 | prog_freq=10000 36 | save_freq=125000 37 | save_versions=$save_freq 38 | gpu=0 39 | random_starts=30 40 | pool_frms="type="$pool_frms_type",size="$pool_frms_size 41 | num_threads=4 42 | 43 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -steps $steps -eval_freq $eval_freq -eval_steps $eval_steps -prog_freq $prog_freq -save_freq $save_freq -save_versions $save_versions -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads" 44 | echo $args 45 | 46 | cd dqn 47 | ../torch/bin/qlua train_agent.lua $args 48 | -------------------------------------------------------------------------------- /run_gpu2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. ./run_gpu breakout "; exit 0 5 | fi 6 | ENV=$1 7 | FRAMEWORK="alewrap" 8 | 9 | game_path=$PWD"/roms/" 10 | env_params="useRGB=true" 11 | agent="NeuralQLearner" 12 | n_replay=1 13 | netfile="\"convnet_atari3\"" 14 | update_freq=4 15 | actrep=4 16 | discount=0.99 17 | seed=1 18 | learn_start=50000 19 | pool_frms_type="\"max\"" 20 | pool_frms_size=2 21 | initial_priority="false" 22 | replay_memory=1000000 23 | eps_end=0.1 24 | eps_endt=replay_memory 25 | lr=0.00025 26 | agent_type="DQN3_0_1" 27 | preproc_net="\"net_downsample_2x_full_y\"" 28 | agent_name=$agent_type"_"$1"_FULL_Y_A" 29 | agent_nameB=$agent_type"_"$1"_FULL_Y_B" 30 | state_dim=7056 31 | ncols=1 32 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1" 33 | steps=50000000 34 | eval_freq=125000 35 | eval_steps=62500 36 | prog_freq=10000 37 | save_freq=250000 38 | save_versions=$save_freq 39 | gpu=0 40 | gpuB=1 41 | random_starts=30 42 | pool_frms="type="$pool_frms_type",size="$pool_frms_size 43 | num_threads=4 44 | 45 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -steps $steps -eval_freq $eval_freq -eval_steps $eval_steps -prog_freq $prog_freq -save_freq $save_freq -save_versions $save_versions -actrep $actrep -gpu $gpu -gpuB $gpuB -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 0" 46 | if [ -n "$2" ]; then 47 | args="$args -network ${agent_name}_$2.t7 -networkB ${agent_nameB}_$2.t7" 48 | fi 49 | 50 | echo $args 51 | 52 | cd dqn 53 | ../torch/bin/qlua train_2agent.lua $args 54 | -------------------------------------------------------------------------------- /run_gpu2_resume: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. ./run_gpu breakout "; exit 0 5 | fi 6 | ENV=$1 7 | FRAMEWORK="alewrap" 8 | 9 | game_path=$PWD"/roms/" 10 | env_params="useRGB=true" 11 | agent="NeuralQLearner" 12 | # number of times to perform learning during each step 13 | n_replay=1 14 | netfile="\"convnet_atari3\"" 15 | # perform learning after every 4 steps 16 | update_freq=4 17 | # how many times to repeat chosen action 18 | actrep=4 19 | # future reward discount 20 | discount=0.99 21 | # random seed used to initialize torch 22 | seed=1 23 | # start learning after this steps 24 | learn_start=512 25 | # ? 26 | pool_frms_type="\"max\"" 27 | pool_frms_size=2 28 | # not used? 29 | initial_priority="false" 30 | # replay memory size 31 | replay_memory=1000000 32 | # exploration rate in the end 33 | eps_end=0.1 34 | # how many steps decay exploration rate 35 | eps_endt=0 36 | # learning rate 37 | lr=0.00025 38 | agent_type="DQN3_0_1" 39 | preproc_net="\"net_downsample_2x_full_y\"" 40 | agent_name=$agent_type"_"$1"_FULL_Y_A" 41 | agent_nameB=$agent_type"_"$1"_FULL_Y_B" 42 | # state dimensionality 84x84 43 | state_dim=7056 44 | # number of color channels (greyscale) 45 | ncols=1 46 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1" 47 | # how many steps to train 48 | steps=50000000 49 | # testing frequency 50 | eval_freq=125000 51 | # how many steps to test 52 | eval_steps=62500 53 | # frequency of progress output 54 | prog_freq=10000 55 | # save frequency 56 | save_freq=250000 57 | # how often to save versions (will be used for calculating version numbers) 58 | save_versions=$save_freq 59 | # GPU-s to use, first and second player 60 | gpu=0 61 | gpuB=1 62 | random_starts=30 63 | pool_frms="type="$pool_frms_type",size="$pool_frms_size 64 | num_threads=4 65 | 66 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -steps $steps -eval_freq $eval_freq -eval_steps $eval_steps -prog_freq $prog_freq -save_freq $save_freq -save_versions $save_versions -actrep $actrep -gpu $gpu -gpuB $gpuB -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 0" 67 | if [ -n "$2" ]; then 68 | args="$args -network ${agent_name}_$2.t7 -networkB ${agent_nameB}_$2.t7" 69 | fi 70 | 71 | echo $args 72 | 73 | cd dqn 74 | ../torch/bin/qlua train_2agent.lua $args 75 | -------------------------------------------------------------------------------- /test_cpu: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. ./run_gpu breakout []"; exit 0 5 | fi 6 | ENV=$1 7 | FRAMEWORK="alewrap" 8 | 9 | game_path=$PWD"/roms/" 10 | env_params="useRGB=true" 11 | agent="NeuralQLearner" 12 | n_replay=1 13 | netfile="\"convnet_atari3\"" 14 | update_freq=4 15 | actrep=4 16 | discount=0.99 17 | seed=1 18 | learn_start=0 19 | pool_frms_type="\"max\"" 20 | pool_frms_size=2 21 | initial_priority="false" 22 | replay_memory=1000000 23 | eps_end=0.1 24 | eps_endt=replay_memory 25 | lr=0.00025 26 | agent_type="DQN3_0_1" 27 | preproc_net="\"net_downsample_2x_full_y\"" 28 | agent_name=$agent_type"_"$1"_FULL_Y" 29 | if [ -z "$2" ]; then 30 | network_file="$agent_name.t7" 31 | gif_file="../sessions/$ENV.gif" 32 | csv_file="../sessions/$ENV.csv" 33 | else 34 | network_file="${agent_name}_$2.t7" 35 | gif_file="../sessions/${ENV}_$2.gif" 36 | csv_file="../sessions/${ENV}_$2.csv" 37 | fi 38 | state_dim=7056 39 | ncols=1 40 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1" 41 | gpu=-1 42 | random_starts=30 43 | pool_frms="type="$pool_frms_type",size="$pool_frms_size 44 | num_threads=4 45 | 46 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -gif_file $gif_file -csv_file $csv_file" 47 | echo $args 48 | 49 | cd dqn 50 | ../torch/bin/qlua test_agent.lua $args 51 | -------------------------------------------------------------------------------- /test_gpu: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. ./run_gpu breakout []"; exit 0 5 | fi 6 | ENV=$1 7 | FRAMEWORK="alewrap" 8 | 9 | game_path=$PWD"/roms/" 10 | env_params="useRGB=true" 11 | agent="NeuralQLearner" 12 | n_replay=1 13 | netfile="\"convnet_atari3\"" 14 | update_freq=4 15 | actrep=4 16 | discount=0.99 17 | seed=1 18 | learn_start=0 19 | pool_frms_type="\"max\"" 20 | pool_frms_size=2 21 | initial_priority="false" 22 | replay_memory=1000000 23 | eps_end=0.1 24 | eps_endt=replay_memory 25 | lr=0.00025 26 | agent_type="DQN3_0_1" 27 | preproc_net="\"net_downsample_2x_full_y\"" 28 | agent_name=$agent_type"_"$1"_FULL_Y" 29 | 30 | if [ -z "$2" ]; then 31 | network_file="$agent_name.t7" 32 | gif_file="../sessions/$ENV.gif" 33 | csv_file="../sessions/$ENV.csv" 34 | else 35 | network_file="${agent_name}_$2.t7" 36 | gif_file="../sessions/${ENV}_$2.gif" 37 | csv_file="../sessions/${ENV}_$2.csv" 38 | fi 39 | state_dim=7056 40 | ncols=1 41 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1" 42 | gpu=0 43 | random_starts=30 44 | pool_frms="type="$pool_frms_type",size="$pool_frms_size 45 | num_threads=4 46 | 47 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -gif_file $gif_file -csv_file $csv_file" 48 | echo $args 49 | 50 | cd dqn 51 | ../torch/bin/qlua test_agent.lua $args 52 | -------------------------------------------------------------------------------- /test_gpu2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. ./run_gpu breakout [] []"; exit 0 5 | fi 6 | ENV=$1 7 | FRAMEWORK="alewrap" 8 | 9 | game_path=$PWD"/roms/" 10 | env_params="useRGB=true" 11 | agent="NeuralQLearner" 12 | n_replay=1 13 | netfile="\"convnet_atari3\"" 14 | update_freq=4 15 | actrep=4 16 | discount=0.99 17 | seed=3 18 | if [ -z "$3" ]; then 19 | seed=1 20 | else 21 | seed=$3 22 | 23 | fi 24 | learn_start=0 25 | pool_frms_type="\"max\"" 26 | pool_frms_size=2 27 | initial_priority="false" 28 | replay_memory=1000000 29 | eps_end=0.1 30 | eps_endt=replay_memory 31 | lr=0.00025 32 | agent_type="DQN3_0_1" 33 | preproc_net="\"net_downsample_2x_full_y\"" 34 | agent_name=$agent_type"_"$1"_FULL_Y_A" 35 | agent_nameB=$agent_type"_"$1"_FULL_Y_B" 36 | datas_file="$ENV.csv" 37 | if [ -z "$2" ]; then 38 | network_file="$agent_name.t7" 39 | network_fileB="$agent_nameB.t7" 40 | gif_file="../sessions/$ENV.gif" 41 | csv_file="../sessions/$ENV.csv" 42 | version=0 43 | else 44 | network_file="${agent_name}_$2.t7" 45 | network_fileB="${agent_nameB}_$2.t7" 46 | gif_file="../sessions/${ENV}_$2.gif" 47 | csv_file="../sessions/${ENV}_$2.csv" 48 | version=$2 49 | fi 50 | state_dim=7056 51 | ncols=1 52 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1" 53 | gpu=0 54 | random_starts=30 55 | pool_frms="type="$pool_frms_type",size="$pool_frms_size 56 | num_threads=4 57 | cd dqn 58 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -networkB $network_fileB -gif_file $gif_file -csv_file $csv_file -version $version -datas_file $datas_file" 59 | ../torch/bin/qlua test_2agent.lua $args 60 | 61 | -------------------------------------------------------------------------------- /test_gpu2_seeds: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. ./run_gpu breakout [] []"; exit 0 5 | fi 6 | 7 | ENV=$1 8 | FRAMEWORK="alewrap" 9 | 10 | game_path=$PWD"/roms/" 11 | env_params="useRGB=true" 12 | agent="NeuralQLearner" 13 | n_replay=1 14 | netfile="\"convnet_atari3\"" 15 | update_freq=4 16 | actrep=4 17 | discount=0.99 18 | learn_start=0 19 | pool_frms_type="\"max\"" 20 | pool_frms_size=2 21 | initial_priority="false" 22 | replay_memory=1000000 23 | eps_end=0.1 24 | eps_endt=replay_memory 25 | lr=0.00025 26 | agent_type="DQN3_0_1" 27 | preproc_net="\"net_downsample_2x_full_y\"" 28 | agent_name=$agent_type"_"$1"_FULL_Y_A" 29 | agent_nameB=$agent_type"_"$1"_FULL_Y_B" 30 | datas_file="$ENV.csv" 31 | if [ -z "$2" ]; then 32 | network_file="$agent_name.t7" 33 | network_fileB="$agent_nameB.t7" 34 | gif_file="../sessions/$ENV.gif" 35 | csv_file="../sessions/$ENV.csv" 36 | version=0 37 | else 38 | network_file="${agent_name}_$2.t7" 39 | network_fileB="${agent_nameB}_$2.t7" 40 | gif_file="../sessions/${ENV}_$2.gif" 41 | csv_file="../sessions/${ENV}_$2.csv" 42 | version=$2 43 | fi 44 | state_dim=7056 45 | ncols=1 46 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1" 47 | gpu=0 48 | random_starts=30 49 | pool_frms="type="$pool_frms_type",size="$pool_frms_size 50 | num_threads=4 51 | 52 | if [ -z "$3" ]; then 53 | maxseed=10 54 | else 55 | maxseed=$3 56 | fi 57 | 58 | cd dqn 59 | for seed in `seq $maxseed` 60 | do 61 | echo $seed 62 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -networkB $network_fileB -gif_file $gif_file -csv_file $csv_file -version $version -datas_file $datas_file" 63 | ../torch/bin/qlua test_2agent.lua $args 64 | done 65 | 66 | -------------------------------------------------------------------------------- /test_gpu2_versions: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ] 4 | then echo "Please provide the name of the game, e.g. ./run_gpu breakout [] []"; exit 0 5 | fi 6 | 7 | ENV=$1 8 | FRAMEWORK="alewrap" 9 | 10 | game_path=$PWD"/roms/" 11 | env_params="useRGB=true" 12 | agent="NeuralQLearner" 13 | n_replay=1 14 | netfile="\"convnet_atari3\"" 15 | update_freq=4 16 | actrep=4 17 | discount=0.99 18 | learn_start=0 19 | pool_frms_type="\"max\"" 20 | pool_frms_size=2 21 | initial_priority="false" 22 | replay_memory=1000000 23 | eps_end=0.1 24 | eps_endt=replay_memory 25 | lr=0.00025 26 | agent_type="DQN3_0_1" 27 | preproc_net="\"net_downsample_2x_full_y\"" 28 | agent_name=$agent_type"_"$1"_FULL_Y_A" 29 | agent_nameB=$agent_type"_"$1"_FULL_Y_B" 30 | datas_file="$ENV.csv" 31 | state_dim=7056 32 | ncols=1 33 | agent_params="lr="$lr",ep=1,ep_end="$eps_end",ep_endt="$eps_endt",discount="$discount",hist_len=4,learn_start="$learn_start",replay_memory="$replay_memory",update_freq="$update_freq",n_replay="$n_replay",network="$netfile",preproc="$preproc_net",state_dim="$state_dim",minibatch_size=32,rescale_r=1,ncols="$ncols",bufferSize=512,valid_size=500,target_q=10000,clip_delta=1,min_reward=-1,max_reward=1" 34 | gpu=0 35 | random_starts=30 36 | pool_frms="type="$pool_frms_type",size="$pool_frms_size 37 | num_threads=4 38 | 39 | if [ -z "$2" ]; then 40 | maxversion=49 41 | else 42 | maxversion=$2 43 | fi 44 | 45 | if [ -z "$3" ]; then 46 | maxseed=10 47 | else 48 | maxseed=$3 49 | fi 50 | 51 | cd dqn 52 | for version in `seq $maxversion` 53 | do 54 | network_file="${agent_name}_$version.t7" 55 | network_fileB="${agent_nameB}_$version.t7" 56 | gif_file="../sessions/${ENV}_$version.gif" 57 | csv_file="../sessions/${ENV}_$version.csv" 58 | 59 | for seed in `seq $maxseed` 60 | do 61 | args="-framework $FRAMEWORK -game_path $game_path -name $agent_name -nameB $agent_nameB -env $ENV -env_params $env_params -agent $agent -agent_params $agent_params -actrep $actrep -gpu $gpu -random_starts $random_starts -pool_frms $pool_frms -seed $seed -threads $num_threads -verbose 1 -network $network_file -networkB $network_fileB -gif_file $gif_file -csv_file $csv_file -version $version -datas_file $datas_file" 62 | ../torch/bin/qlua test_2agent.lua $args 63 | done 64 | done 65 | 66 | -------------------------------------------------------------------------------- /test_schemes: -------------------------------------------------------------------------------- 1 | ./test_gpu2_versions Pong2PlayerVS 49 2 | ./test_gpu2_seeds Pong2Player075p 49 3 | ./test_gpu2_seeds Pong2Player05p 49 4 | ./test_gpu2_seeds Pong2Player025p 49 5 | ./test_gpu2_seeds Pong2Player0 49 6 | ./test_gpu2_seeds Pong2Player05 49 7 | ./test_gpu2_seeds Pong2Player025 49 8 | ./test_gpu2_seeds Pong2Player075 49 9 | ./test_gpu2_versions Pong2Player 49 10 | --------------------------------------------------------------------------------