├── .gitignore ├── results ├── .DS_Store ├── try1 │ ├── Hopper.png │ ├── Reacher.png │ ├── Swimmer.png │ ├── HalfCheetah.png │ ├── plots_step_reward_multi.py │ ├── plots_threads_iterspeed.py │ ├── plots_threads_return_time.py │ ├── plots_step_reward.py │ ├── plots.py │ ├── Hopper-v1-5 │ ├── Swimmer-v1-1 │ ├── Reacher-v1-1 │ └── Reacher-v1-5 ├── montecarlo-test │ ├── 5000 │ ├── 10000 │ ├── 20000 │ ├── .DS_Store │ ├── all.png │ ├── new_vs_old.png │ ├── steps-reward.png │ ├── time-reward.png │ ├── updates-reward.png │ ├── steps-reward-long.png │ ├── plots_mc_episodes.py │ ├── plots_mc_stepcount.py │ ├── plots_mc_walltime.py │ ├── plots_mc_kl.py │ ├── 10xKL │ └── 1000-short ├── speedup │ ├── threads_iterspeed.png │ ├── threads_return_time.png │ ├── README.md │ ├── Reacher-v1-1 │ ├── Reacher-v1-4 │ ├── Reacher-v1-7 │ └── Reacher-v1-8 ├── vs_fixed │ ├── reacher │ │ ├── compare.png │ │ ├── fixed_kl.png │ │ ├── fixed_steps.png │ │ ├── compare-nomargin.png │ │ ├── fixed_kl.py │ │ ├── fixed_steps.py │ │ ├── compare-nomargin.py │ │ └── compare.py │ ├── reacher2 │ │ ├── compare.png │ │ ├── compare-nomargin.png │ │ ├── params.py │ │ └── compare.py │ ├── swimmer │ │ ├── compare.png │ │ ├── fixed_kl.png │ │ ├── fixed_steps.png │ │ ├── compare-nomargin.png │ │ ├── fixed_kl.py │ │ ├── fixed_steps.py │ │ ├── compare-nomargin.py │ │ ├── params.py │ │ └── compare.py │ ├── swimmer2 │ │ ├── compare.png │ │ ├── compare-nomargin.png │ │ └── compare.py │ ├── halfcheetah │ │ ├── compare.png │ │ ├── fixed_kl.png │ │ ├── fixed_steps.png │ │ ├── compare-nomargin.png │ │ ├── fixed_kl.py │ │ ├── fixed_steps.py │ │ ├── compare-nomargin.py │ │ └── compare.py │ └── halfcheetah2 │ │ ├── compare.png │ │ ├── compare-nomargin.png │ │ └── compare.py ├── Adaptive-vs-Direct │ ├── reacher_linear_kl.jpg │ ├── reacher_linear_steps.jpg │ ├── status.md │ ├── linear_kl.py │ └── linear_steps.py ├── new_vs_old │ └── plots_new_v_old.py └── try2 │ └── Hopper-v1-1 ├── texput.log ├── README.md ├── trials.txt ├── trials_old.txt ├── value_function.py ├── main.py ├── rollouts.py ├── utils.py └── model.py /.gitignore: -------------------------------------------------------------------------------- 1 | MUJUCO_LOG.txt 2 | *.pyc 3 | paper/ 4 | -------------------------------------------------------------------------------- /results/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/.DS_Store -------------------------------------------------------------------------------- /results/try1/Hopper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/try1/Hopper.png -------------------------------------------------------------------------------- /results/try1/Reacher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/try1/Reacher.png -------------------------------------------------------------------------------- /results/try1/Swimmer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/try1/Swimmer.png -------------------------------------------------------------------------------- /results/try1/HalfCheetah.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/try1/HalfCheetah.png -------------------------------------------------------------------------------- /results/montecarlo-test/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/montecarlo-test/.DS_Store -------------------------------------------------------------------------------- /results/montecarlo-test/all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/montecarlo-test/all.png -------------------------------------------------------------------------------- /results/montecarlo-test/new_vs_old.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/montecarlo-test/new_vs_old.png -------------------------------------------------------------------------------- /results/speedup/threads_iterspeed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/speedup/threads_iterspeed.png -------------------------------------------------------------------------------- /results/vs_fixed/reacher/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/reacher/compare.png -------------------------------------------------------------------------------- /results/vs_fixed/reacher/fixed_kl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/reacher/fixed_kl.png -------------------------------------------------------------------------------- /results/vs_fixed/reacher2/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/reacher2/compare.png -------------------------------------------------------------------------------- /results/vs_fixed/swimmer/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/swimmer/compare.png -------------------------------------------------------------------------------- /results/vs_fixed/swimmer/fixed_kl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/swimmer/fixed_kl.png -------------------------------------------------------------------------------- /results/vs_fixed/swimmer2/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/swimmer2/compare.png -------------------------------------------------------------------------------- /results/montecarlo-test/steps-reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/montecarlo-test/steps-reward.png -------------------------------------------------------------------------------- /results/montecarlo-test/time-reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/montecarlo-test/time-reward.png -------------------------------------------------------------------------------- /results/speedup/threads_return_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/speedup/threads_return_time.png -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/halfcheetah/compare.png -------------------------------------------------------------------------------- /results/vs_fixed/reacher/fixed_steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/reacher/fixed_steps.png -------------------------------------------------------------------------------- /results/vs_fixed/swimmer/fixed_steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/swimmer/fixed_steps.png -------------------------------------------------------------------------------- /results/montecarlo-test/updates-reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/montecarlo-test/updates-reward.png -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah/fixed_kl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/halfcheetah/fixed_kl.png -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah2/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/halfcheetah2/compare.png -------------------------------------------------------------------------------- /results/montecarlo-test/steps-reward-long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/montecarlo-test/steps-reward-long.png -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah/fixed_steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/halfcheetah/fixed_steps.png -------------------------------------------------------------------------------- /results/vs_fixed/reacher/compare-nomargin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/reacher/compare-nomargin.png -------------------------------------------------------------------------------- /results/vs_fixed/swimmer/compare-nomargin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/swimmer/compare-nomargin.png -------------------------------------------------------------------------------- /results/Adaptive-vs-Direct/reacher_linear_kl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/Adaptive-vs-Direct/reacher_linear_kl.jpg -------------------------------------------------------------------------------- /results/vs_fixed/reacher2/compare-nomargin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/reacher2/compare-nomargin.png -------------------------------------------------------------------------------- /results/vs_fixed/swimmer2/compare-nomargin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/swimmer2/compare-nomargin.png -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah/compare-nomargin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/halfcheetah/compare-nomargin.png -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah2/compare-nomargin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/vs_fixed/halfcheetah2/compare-nomargin.png -------------------------------------------------------------------------------- /results/Adaptive-vs-Direct/reacher_linear_steps.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kvfrans/parallel-trpo/HEAD/results/Adaptive-vs-Direct/reacher_linear_steps.jpg -------------------------------------------------------------------------------- /results/speedup/README.md: -------------------------------------------------------------------------------- 1 | # parallel-trpo 2 | 3 | A parallel implementation of TRPO. 4 | 5 | I'm working towards the ideas at [this openAI research request](https://openai.com/requests-for-research/#parallel-trpo). Code is working, and I'm in the middle of collecting data and writing a paper. 6 | 7 | Currently has about a 2x speedup with 4 multiprocesses running on a 4 core macbook pro -------------------------------------------------------------------------------- /results/try1/plots_step_reward_multi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "Reacher-v1" 7 | 8 | steps = [] 9 | rewards = [] 10 | 11 | for i in xrange(8): 12 | with open("speedup/"+task+"-"+str(i+1)) as data_file: 13 | data = json.load(data_file) 14 | 15 | steps.append([]) 16 | rewards.append([]) 17 | for j in xrange(len(data["mean_reward"])): 18 | rewards[i].append(data["mean_reward"][j]) 19 | steps[i].append(j) 20 | 21 | plt.plot(np.array(steps[i]),np.array(rewards[i]),label="threads: "+str(i)) 22 | 23 | plt.xlabel("Steps") 24 | plt.ylabel("Reward") 25 | plt.legend(loc=4) 26 | plt.title(task) 27 | plt.show() 28 | -------------------------------------------------------------------------------- /results/try1/plots_threads_iterspeed.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "Reacher-v1" 7 | 8 | times = [] 9 | rewards = [] 10 | totaltime = [] 11 | t = [] 12 | l = [] 13 | r = [] 14 | for i in xrange(8): 15 | with open("speedup/"+task+"-"+str(i+1)) as data_file: 16 | data = json.load(data_file) 17 | 18 | 19 | t.append(np.mean(data["rollout_time"])) 20 | l.append(np.mean(data["learn_time"])) 21 | r.append(i+1) 22 | 23 | print r 24 | print t 25 | plt.plot(r,t,label="Rollout time") 26 | plt.plot(r,l,label="Learning time") 27 | plt.xlabel("Threads") 28 | plt.ylabel("Time for an iteration (seconds)") 29 | plt.legend(loc=1) 30 | plt.title(task) 31 | plt.show() 32 | -------------------------------------------------------------------------------- /texput.log: -------------------------------------------------------------------------------- 1 | This is pdfTeX, Version 3.14159265-2.6-1.40.17 (TeX Live 2016) (preloaded format=pdflatex 2016.5.22) 24 SEP 2016 21:29 2 | entering extended mode 3 | restricted \write18 enabled. 4 | %&-line parsing enabled. 5 | **trpo 6 | 7 | ! Emergency stop. 8 | <*> trpo 9 | 10 | End of file on the terminal! 11 | 12 | 13 | Here is how much of TeX's memory you used: 14 | 3 strings out of 493014 15 | 99 string characters out of 6133351 16 | 53601 words of memory out of 5000000 17 | 3648 multiletter control sequences out of 15000+600000 18 | 3640 words of font info for 14 fonts, out of 8000000 for 9000 19 | 1141 hyphenation exceptions out of 8191 20 | 0i,0n,0p,7b,6s stack positions out of 5000i,500n,10000p,200000b,80000s 21 | ! ==> Fatal error occurred, no output PDF file produced! 22 | -------------------------------------------------------------------------------- /results/Adaptive-vs-Direct/status.md: -------------------------------------------------------------------------------- 1 | The idea is to show that an adaptive method would perform well with one set of hyperparameters, compared to linear/exponential decay. 2 | 3 | Results to find: 4 | 5 | For each environment: HalfCheetah, Swimmer, Hopper, Reacher 6 | 7 | KL: 8 | Linear: 9 | decrease KL by 0.000001, 0.00001, 0.0001 10 | Exponential: 11 | decrease KL by 0.9, 0.99, 0.999 12 | Adaptive: 13 | adapt KL by 0.000001, 0.00001, 0.0001 14 | 15 | Stepcount: 16 | Linear: 17 | increase stepcount by 20, 100, 500, 1000 18 | Exponential: 19 | increase linear by 1.001, 1.01, 1.1 20 | Adaptive: 21 | adapt stepcount by 20, 100, 500, 1000 22 | 23 | First only change the KL, then only change the stepcount. 24 | 25 | We will make one final comparison between [only changing KL, only changing stepcount, and changing both] to show the adaptive method works when changing both hyperparameters -------------------------------------------------------------------------------- /results/montecarlo-test/plots_mc_episodes.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "Reacher-v1" 7 | 8 | times = [] 9 | rewards = [] 10 | totaltime = [] 11 | t = [] 12 | r = [] 13 | 14 | trials = [1000,5000,10000,20000] 15 | for i in xrange(4): 16 | with open("montecarlo-test/"+str(trials[i])) as data_file: 17 | data = json.load(data_file) 18 | 19 | times.append([]) 20 | rewards.append([]) 21 | for e in xrange(len(data["mean_reward"])): 22 | times[i].append(e) 23 | rewards[i].append(data["mean_reward"][e]) 24 | 25 | t.append(np.array(times[i])) 26 | r.append(np.array(rewards[i])) 27 | 28 | plt.plot(t[i],r[i],color=(1 - (i/8.0),i/8.0,1.0),label=("%d steps / update" % (trials[i]))) 29 | plt.xlabel("Policy updates (iterations)") 30 | plt.ylabel("Average return") 31 | plt.legend(loc=4) 32 | plt.title(task) 33 | plt.show() 34 | -------------------------------------------------------------------------------- /results/montecarlo-test/plots_mc_stepcount.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "Reacher-v1" 7 | 8 | times = [] 9 | rewards = [] 10 | totaltime = [] 11 | t = [] 12 | r = [] 13 | 14 | trials = [1000,5000,10000,20000] 15 | for i in xrange(4): 16 | with open("montecarlo-test/"+str(trials[i])) as data_file: 17 | data = json.load(data_file) 18 | 19 | times.append([]) 20 | rewards.append([]) 21 | for e in xrange(len(data["mean_reward"])): 22 | times[i].append(e*trials[i]) 23 | rewards[i].append(data["mean_reward"][e]) 24 | 25 | t.append(np.array(times[i])) 26 | r.append(np.array(rewards[i])) 27 | 28 | plt.plot(t[i],r[i],color=(1 - (i/8.0),i/8.0,1.0),label=("%d steps / update" % (trials[i]))) 29 | plt.xlabel("Environment Steps Seen") 30 | plt.ylabel("Average return") 31 | plt.legend(loc=4) 32 | plt.title(task) 33 | plt.show() 34 | -------------------------------------------------------------------------------- /results/try1/plots_threads_return_time.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "Reacher-v1" 7 | 8 | times = [] 9 | rewards = [] 10 | totaltime = [] 11 | t = [] 12 | r = [] 13 | for i in xrange(8): 14 | with open("speedup/"+task+"-"+str(i+1)) as data_file: 15 | data = json.load(data_file) 16 | 17 | times.append([]) 18 | rewards.append([]) 19 | totaltime.append(0) 20 | for e in xrange(len(data["mean_reward"])): 21 | totaltime[i] += data["rollout_time"][e] + data["learn_time"][e] 22 | times[i].append(totaltime[i]) 23 | rewards[i].append(data["mean_reward"][e]) 24 | 25 | t.append(np.array(times[i])) 26 | r.append(np.array(rewards[i])) 27 | 28 | plt.plot(t[i],r[i],color=(1 - (i/8.0),i/8.0,1.0),label=("%d threads" % (i+1))) 29 | plt.xlabel("Training time (minutes)") 30 | plt.ylabel("Average return") 31 | plt.legend(loc=4) 32 | plt.title(task) 33 | plt.show() 34 | -------------------------------------------------------------------------------- /results/montecarlo-test/plots_mc_walltime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "Reacher-v1" 7 | 8 | times = [] 9 | rewards = [] 10 | totaltime = [] 11 | t = [] 12 | r = [] 13 | 14 | trials = [1000,5000,10000,20000] 15 | for i in xrange(4): 16 | with open("montecarlo-test/"+str(trials[i])) as data_file: 17 | data = json.load(data_file) 18 | 19 | times.append([]) 20 | rewards.append([]) 21 | totaltime.append(0) 22 | for e in xrange(len(data["mean_reward"])): 23 | totaltime[i] += data["rollout_time"][e] + data["learn_time"][e] 24 | times[i].append(totaltime[i]) 25 | rewards[i].append(data["mean_reward"][e]) 26 | 27 | t.append(np.array(times[i])) 28 | r.append(np.array(rewards[i])) 29 | 30 | plt.plot(t[i],r[i],color=(1 - (i/8.0),i/8.0,1.0),label=("%d steps / update" % (trials[i]))) 31 | plt.xlabel("Time (mins)") 32 | plt.ylabel("Average return") 33 | plt.legend(loc=4) 34 | plt.title(task) 35 | plt.show() 36 | -------------------------------------------------------------------------------- /results/montecarlo-test/plots_mc_kl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "Reacher-v1" 7 | 8 | times = [] 9 | rewards = [] 10 | totaltime = [] 11 | t = [] 12 | r = [] 13 | 14 | trials = ["10xKL","20000"] 15 | for i in xrange(2): 16 | with open("montecarlo-test/"+trials[i]) as data_file: 17 | data = json.load(data_file) 18 | 19 | times.append([]) 20 | rewards.append([]) 21 | for e in xrange(len(data["mean_reward"])): 22 | if i == 0: 23 | times[i].append(e) 24 | else: 25 | times[i].append(e) 26 | rewards[i].append(data["mean_reward"][e]) 27 | 28 | t.append(np.array(times[i])) 29 | r.append(np.array(rewards[i])) 30 | 31 | if i == 0: 32 | plt.plot(t[i],r[i],color=(1 - (i/2.0),i/2.0,1.0),label="0.01 max KL") 33 | else: 34 | plt.plot(t[i],r[i],color=(1 - (i/2.0),i/2.0,1.0),label="0.001 max KL") 35 | plt.xlabel("Environment Steps Seen") 36 | plt.ylabel("Average return") 37 | plt.legend(loc=4) 38 | plt.title(task) 39 | plt.show() 40 | -------------------------------------------------------------------------------- /results/try1/plots_step_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | # task = "Reacher-v1" 7 | task = sys.argv[1] 8 | 9 | with open("try1/"+task+"-1") as data_file: 10 | data_1 = json.load(data_file) 11 | 12 | with open("try1/"+task+"-5") as data_file: 13 | data_2 = json.load(data_file) 14 | 15 | times_1 = [] 16 | rewards_1 = [] 17 | for i in xrange(len(data_1["mean_reward"])): 18 | times_1.append(i) 19 | rewards_1.append(data_1["mean_reward"][i]) 20 | 21 | times_2 = [] 22 | rewards_2 = [] 23 | for i in xrange(len(data_2["mean_reward"])): 24 | times_2.append(i) 25 | rewards_2.append(data_2["mean_reward"][i]) 26 | 27 | t1 = np.array(times_1) 28 | r1 = np.array(rewards_1) 29 | t2 = np.array(times_2) 30 | r2 = np.array(rewards_2) 31 | 32 | # t1 = np.arange(len(times_1)) 33 | # t2 = np.arange(len(times_2)) 34 | 35 | plt.plot(t1,r1,"r",label="single thread") 36 | plt.plot(t2,r2,"b",label="5 threads") 37 | plt.xlabel("Training time (minutes)") 38 | plt.ylabel("Average return") 39 | plt.legend(loc=4) 40 | plt.title(task) 41 | plt.show() 42 | -------------------------------------------------------------------------------- /results/Adaptive-vs-Direct/linear_kl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "HalfCheetah-v1" 7 | 8 | times = [] 9 | rewards = [] 10 | t = [] 11 | r = [] 12 | 13 | trials = ["Reacher-v1-linear-0.000001-0.000000","Reacher-v1-linear-0.000010-0.000000","Reacher-v1-linear-0.000100-0.000000"] 14 | for i in xrange(len(trials)): 15 | with open(trials[i]) as data_file: 16 | data = json.load(data_file) 17 | 18 | times.append([]) 19 | rewards.append([]) 20 | totaltime = 0 21 | for e in xrange(len(data["mean_reward"])): 22 | totaltime += data["timesteps"][e] 23 | # totaltime += 1 24 | if i == 0: 25 | times[i].append(totaltime) 26 | else: 27 | times[i].append(totaltime) 28 | rewards[i].append(data["mean_reward"][e]) 29 | 30 | t.append(np.array(times[i])) 31 | r.append(np.array(rewards[i])) 32 | 33 | plt.plot(t[i],r[i],color=(1 - (i/4.0),i/4.0,1.0),label=trials[i]) 34 | 35 | plt.xlabel("Environment Steps Seen") 36 | plt.ylabel("Average return") 37 | plt.legend(loc=4) 38 | plt.title(task) 39 | plt.show() 40 | -------------------------------------------------------------------------------- /results/Adaptive-vs-Direct/linear_steps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "HalfCheetah-v1" 7 | 8 | times = [] 9 | rewards = [] 10 | t = [] 11 | r = [] 12 | 13 | trials = ["Reacher-v1-linear-0.000000-20.000000","Reacher-v1-linear-0.000000-100.000000","Reacher-v1-linear-0.000000-500.000000","Reacher-v1-adaptive-0.000000-500.000000"] 14 | for i in xrange(len(trials)): 15 | with open(trials[i]) as data_file: 16 | data = json.load(data_file) 17 | 18 | times.append([]) 19 | rewards.append([]) 20 | totaltime = 0 21 | for e in xrange(len(data["mean_reward"])): 22 | totaltime += data["timesteps"][e] 23 | # totaltime += 1 24 | if i == 0: 25 | times[i].append(totaltime) 26 | else: 27 | times[i].append(totaltime) 28 | rewards[i].append(data["mean_reward"][e]) 29 | 30 | t.append(np.array(times[i])) 31 | r.append(np.array(rewards[i])) 32 | 33 | plt.plot(t[i],r[i],color=(1 - (i/5.0),i/5.0,1.0),label=trials[i]) 34 | 35 | plt.xlabel("Environment Steps Seen") 36 | plt.ylabel("Average return") 37 | plt.legend(loc=4) 38 | plt.title(task) 39 | plt.show() 40 | -------------------------------------------------------------------------------- /results/try1/plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | # task = "Reacher-v1" 7 | task = sys.argv[1] 8 | 9 | with open("done/"+task+"-1") as data_file: 10 | data_1 = json.load(data_file) 11 | 12 | with open("done/"+task+"-5") as data_file: 13 | data_2 = json.load(data_file) 14 | 15 | times_1 = [] 16 | rewards_1 = [] 17 | totaltime_1 = 0 18 | for i in xrange(len(data_1["mean_reward"])): 19 | totaltime_1 += data_1["rollout_time"][i] + data_1["learn_time"][i] 20 | times_1.append(totaltime_1) 21 | rewards_1.append(data_1["mean_reward"][i]) 22 | 23 | times_2 = [] 24 | rewards_2 = [] 25 | totaltime_2 = 0 26 | for i in xrange(len(data_2["mean_reward"])): 27 | totaltime_2 += data_2["rollout_time"][i] + data_2["learn_time"][i] 28 | times_2.append(totaltime_2) 29 | rewards_2.append(data_2["mean_reward"][i]) 30 | 31 | t1 = np.array(times_1) 32 | r1 = np.array(rewards_1) 33 | t2 = np.array(times_2) 34 | r2 = np.array(rewards_2) 35 | 36 | # t1 = np.arange(len(times_1)) 37 | # t2 = np.arange(len(times_2)) 38 | 39 | plt.plot(t1,r1,"r",label="single thread") 40 | plt.plot(t2,r2,"b",label="5 threads") 41 | plt.xlabel("Training time (minutes)") 42 | plt.ylabel("Average return") 43 | plt.legend(loc=4) 44 | plt.title(task) 45 | plt.show() 46 | -------------------------------------------------------------------------------- /results/new_vs_old/plots_new_v_old.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | task = "HalfCheetah-v1" 7 | 8 | times = [] 9 | rewards = [] 10 | t = [] 11 | r = [] 12 | 13 | trials = ["HalfCheetah-newmethod","HalfCheetah-oldmethod","HalfCheetah-averagingLONG"] 14 | for i in xrange(3): 15 | with open("new_vs_old/"+trials[i]) as data_file: 16 | data = json.load(data_file) 17 | 18 | times.append([]) 19 | rewards.append([]) 20 | totaltime = 0 21 | for e in xrange(len(data["mean_reward"])): 22 | totaltime += data["timesteps"][e] 23 | # totaltime += 1 24 | if i == 0: 25 | times[i].append(totaltime) 26 | else: 27 | times[i].append(totaltime) 28 | rewards[i].append(data["mean_reward"][e]) 29 | 30 | t.append(np.array(times[i])) 31 | r.append(np.array(rewards[i])) 32 | 33 | if i == 0: 34 | plt.plot(t[i],r[i],color=(1 - (i/3.0),i/3.0,1.0),label="10,000 + Dyanmic KL Method") 35 | elif i == 1: 36 | plt.plot(t[i],r[i],color=(1 - (i/3.0),i/3.0,1.0),label="20,000 Method (regular TRPO)") 37 | else: 38 | plt.plot(t[i],r[i],color=(1 - (i/3.0),i/3.0,1.0),label="Dynamic Steps + Dynamic KL Method") 39 | plt.xlabel("Environment Steps Seen") 40 | plt.ylabel("Average return") 41 | plt.legend(loc=4) 42 | plt.title(task) 43 | plt.show() 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # parallel-trpo 2 | 3 | A parallel implementation of Trust Region Policy Optimization on environments from OpenAI gym 4 | 5 | Now includes hyperparaemter adaptation as well! More more info, check [my post on this project](http://kvfrans.com/speeding-up-trpo-through-parallelization-and-parameter-adaptation/). 6 | 7 | I'm working towards the ideas at [this openAI research request](https://openai.com/requests-for-research/#parallel-trpo). 8 | The code is based off of [this implementation](https://github.com/ilyasu123/trpo). 9 | 10 | I'm currently working together with [Danijar](https://github.com/danijar) on writing an updated version of [this preliminary paper,](http://kvfrans.com/static/trpo.pdf) describing the multiple actors setup. 11 | 12 | How to run: 13 | ``` 14 | # This just runs a simple training on Reacher-v1. 15 | python main.py 16 | 17 | # For the commands used to recreate results, check trials.txt 18 | 19 | ``` 20 | Parameters: 21 | ``` 22 | --task: what gym environment to run on 23 | --timesteps_per_batch: how many timesteps for each policy iteration 24 | --n_iter: number of iterations 25 | --gamma: discount factor for future rewards_1 26 | --max_kl: maximum KL divergence between new and old policy 27 | --cg_damping: damp on the KL constraint (ratio of original gradient to use) 28 | --num_threads: how many async threads to use 29 | --monitor: whether to monitor progress for publishing results to gym or not 30 | ``` 31 | -------------------------------------------------------------------------------- /results/vs_fixed/reacher/fixed_kl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Reacher-v1-none-20000.000000-0.001000-0.000000-0.000000","Reacher-v1-none-20000.000000-0.005000-0.000000-0.000000","Reacher-v1-none-20000.000000-0.010000-0.000000-0.000000"] 12 | names = ["Fixed 0.001 KL", "Fixed 0.005 KL", "Fixed 0.01 KL"] 13 | 14 | for i in xrange(len(trials)): 15 | with open(trials[i]) as data_file: 16 | data = json.load(data_file) 17 | 18 | times.append([]) 19 | rewards.append([]) 20 | totaltime = 0 21 | 22 | time_since = 0 23 | avg = 0 24 | avgcount = 0 25 | 26 | for e in xrange(len(data["mean_reward"])): 27 | totaltime += data["timesteps"][e] 28 | 29 | time_since += data["timesteps"][e] 30 | avg += data["mean_reward"][e] 31 | avgcount += 1 32 | 33 | if time_since > 10000: 34 | time_since = 0 35 | # totaltime += 1 36 | if i == 0: 37 | times[i].append(totaltime) 38 | else: 39 | times[i].append(totaltime) 40 | rewards[i].append(data["mean_reward"][e]) 41 | 42 | avg = 0 43 | avgcount = 0 44 | 45 | t.append(np.array(times[i])) 46 | r.append(np.array(rewards[i])) 47 | 48 | plt.plot(t[i],r[i],color=(1 - (i/5.0),i/5.0,1.0),label=names[i]) 49 | 50 | plt.xlabel("Environment Steps Seen") 51 | plt.ylabel("Average return") 52 | plt.legend(loc=4) 53 | plt.title("Reacher-v1") 54 | plt.show() 55 | -------------------------------------------------------------------------------- /results/vs_fixed/swimmer/fixed_kl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Swimmer-v1-none-20000.000000-0.001000-0.000000-0.000000","Swimmer-v1-none-20000.000000-0.005000-0.000000-0.000000","Swimmer-v1-none-20000.000000-0.010000-0.000000-0.000000"] 12 | names = ["Fixed 0.001 KL", "Fixed 0.005 KL", "Fixed 0.01 KL"] 13 | 14 | for i in xrange(len(trials)): 15 | with open(trials[i]) as data_file: 16 | data = json.load(data_file) 17 | 18 | times.append([]) 19 | rewards.append([]) 20 | totaltime = 0 21 | 22 | time_since = 0 23 | avg = 0 24 | avgcount = 0 25 | 26 | for e in xrange(len(data["mean_reward"])): 27 | totaltime += data["timesteps"][e] 28 | 29 | time_since += data["timesteps"][e] 30 | avg += data["mean_reward"][e] 31 | avgcount += 1 32 | 33 | if time_since > 10000: 34 | time_since = 0 35 | # totaltime += 1 36 | if i == 0: 37 | times[i].append(totaltime) 38 | else: 39 | times[i].append(totaltime) 40 | rewards[i].append(data["mean_reward"][e]) 41 | 42 | avg = 0 43 | avgcount = 0 44 | 45 | t.append(np.array(times[i])) 46 | r.append(np.array(rewards[i])) 47 | 48 | plt.plot(t[i],r[i],color=(1 - (i/5.0),i/5.0,1.0),label=names[i]) 49 | 50 | plt.xlabel("Environment Steps Seen") 51 | plt.ylabel("Average return") 52 | plt.legend(loc=4) 53 | plt.title("Swimmer-v1") 54 | plt.show() 55 | -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah/fixed_kl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["HalfCheetah-v1-none-20000.000000-0.001000-0.000000-0.000000","HalfCheetah-v1-none-20000.000000-0.005000-0.000000-0.000000","HalfCheetah-v1-none-20000.000000-0.010000-0.000000-0.000000"] 12 | names = ["Fixed 0.001 KL", "Fixed 0.005 KL", "Fixed 0.01 KL"] 13 | 14 | for i in xrange(len(trials)): 15 | with open(trials[i]) as data_file: 16 | data = json.load(data_file) 17 | 18 | times.append([]) 19 | rewards.append([]) 20 | totaltime = 0 21 | 22 | time_since = 0 23 | avg = 0 24 | avgcount = 0 25 | 26 | for e in xrange(len(data["mean_reward"])): 27 | totaltime += data["timesteps"][e] 28 | 29 | time_since += data["timesteps"][e] 30 | avg += data["mean_reward"][e] 31 | avgcount += 1 32 | 33 | if time_since > 10000: 34 | time_since = 0 35 | # totaltime += 1 36 | if i == 0: 37 | times[i].append(totaltime) 38 | else: 39 | times[i].append(totaltime) 40 | rewards[i].append(data["mean_reward"][e]) 41 | 42 | avg = 0 43 | avgcount = 0 44 | 45 | t.append(np.array(times[i])) 46 | r.append(np.array(rewards[i])) 47 | 48 | plt.plot(t[i],r[i],color=(1 - (i/5.0),i/5.0,1.0),label=names[i]) 49 | 50 | plt.xlabel("Environment Steps Seen") 51 | plt.ylabel("Average return") 52 | plt.legend(loc=4) 53 | plt.title("HalfCheetah-v1") 54 | plt.show() 55 | -------------------------------------------------------------------------------- /results/vs_fixed/reacher2/params.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Reacher-v1-adaptive-margin-1000.000000-0.001000-300.000000-0.000500"] 12 | names = ["Adapt both w/ margin"] 13 | for i in xrange(len(trials)): 14 | with open(trials[i]) as data_file: 15 | data = json.load(data_file) 16 | 17 | times.append([]) 18 | rewards.append([]) 19 | totaltime = 0 20 | 21 | time_since = 0 22 | avg = 0 23 | avgcount = 0 24 | 25 | for e in xrange(len(data["mean_reward"])): 26 | totaltime += data["timesteps"][e] 27 | 28 | time_since += data["timesteps"][e] 29 | # import ipdb; ipdb.set_trace() 30 | avg += data["timesteps"][e] 31 | avgcount += 1 32 | 33 | if time_since > 10000 and totaltime < 4000000: 34 | time_since = 0 35 | # totaltime += 1 36 | if i == 0: 37 | times[i].append(totaltime) 38 | else: 39 | times[i].append(totaltime) 40 | rewards[i].append(avg/avgcount) 41 | 42 | avg = 0 43 | avgcount = 0 44 | 45 | t.append(np.array(times[i])) 46 | r.append(np.array(rewards[i])) 47 | 48 | lin, = plt.plot(t[i],r[i],label=names[i]) 49 | # if i == 0: 50 | # lin.remove() 51 | 52 | 53 | plt.xlabel("Environment Steps Seen") 54 | plt.ylabel("Average return") 55 | leg = plt.legend(loc=4) 56 | for legobj in leg.legendHandles: 57 | legobj.set_linewidth(2.0) 58 | plt.title("Reacher-v1") 59 | plt.show() 60 | -------------------------------------------------------------------------------- /results/vs_fixed/reacher/fixed_steps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Reacher-v1-none-20000.000000-0.001000-0.000000-0.000000","Reacher-v1-none-10000.000000-0.001000-0.000000-0.000000","Reacher-v1-none-5000.000000-0.001000-0.000000-0.000000","Reacher-v1-none-1500.000000-0.001000-0.000000-0.000000"] 12 | names = ["Fixed 20,000 steps", "Fixed 10,000 steps", "Fixed 5,000 steps", "Fixed 1,500 steps"] 13 | 14 | for i in xrange(len(trials)): 15 | with open(trials[i]) as data_file: 16 | data = json.load(data_file) 17 | 18 | times.append([]) 19 | rewards.append([]) 20 | totaltime = 0 21 | 22 | time_since = 0 23 | avg = 0 24 | avgcount = 0 25 | 26 | for e in xrange(len(data["mean_reward"])): 27 | totaltime += data["timesteps"][e] 28 | 29 | time_since += data["timesteps"][e] 30 | avg += data["mean_reward"][e] 31 | avgcount += 1 32 | 33 | if time_since > 10000: 34 | time_since = 0 35 | # totaltime += 1 36 | if i == 0: 37 | times[i].append(totaltime) 38 | else: 39 | times[i].append(totaltime) 40 | rewards[i].append(data["mean_reward"][e]) 41 | 42 | avg = 0 43 | avgcount = 0 44 | 45 | t.append(np.array(times[i])) 46 | r.append(np.array(rewards[i])) 47 | 48 | plt.plot(t[i],r[i],color=(1 - (i/5.0),i/5.0,1.0),label=names[i]) 49 | 50 | plt.xlabel("Environment Steps Seen") 51 | plt.ylabel("Average return") 52 | plt.legend(loc=4) 53 | plt.title("Reacher-v1") 54 | plt.show() 55 | -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah/fixed_steps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["HalfCheetah-v1-none-20000.000000-0.001000-0.000000-0.000000","HalfCheetah-v1-none-10000.000000-0.001000-0.000000-0.000000","HalfCheetah-v1-none-5000.000000-0.001000-0.000000-0.000000","HalfCheetah-v1-none-1500.000000-0.001000-0.000000-0.000000"] 12 | names = ["Fixed 20,000 steps", "Fixed 10,000 steps", "Fixed 5,000 steps", "Fixed 1,500 steps"] 13 | 14 | for i in xrange(len(trials)): 15 | with open(trials[i]) as data_file: 16 | data = json.load(data_file) 17 | 18 | times.append([]) 19 | rewards.append([]) 20 | totaltime = 0 21 | 22 | time_since = 0 23 | avg = 0 24 | avgcount = 0 25 | 26 | for e in xrange(len(data["mean_reward"])): 27 | totaltime += data["timesteps"][e] 28 | 29 | time_since += data["timesteps"][e] 30 | avg += data["mean_reward"][e] 31 | avgcount += 1 32 | 33 | if time_since > 10000: 34 | time_since = 0 35 | # totaltime += 1 36 | if i == 0: 37 | times[i].append(totaltime) 38 | else: 39 | times[i].append(totaltime) 40 | rewards[i].append(data["mean_reward"][e]) 41 | 42 | avg = 0 43 | avgcount = 0 44 | 45 | t.append(np.array(times[i])) 46 | r.append(np.array(rewards[i])) 47 | 48 | plt.plot(t[i],r[i],color=(1 - (i/5.0),i/5.0,1.0),label=names[i]) 49 | 50 | plt.xlabel("Environment Steps Seen") 51 | plt.ylabel("Average return") 52 | plt.legend(loc=4) 53 | plt.title("HalfCheetah-v1") 54 | plt.show() 55 | -------------------------------------------------------------------------------- /results/vs_fixed/swimmer/fixed_steps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Swimmer-v1-none-20000.000000-0.001000-0.000000-0.000000","Swimmer-v1-none-10000.000000-0.001000-0.000000-0.000000","Swimmer-v1-none-5000.000000-0.001000-0.000000-0.000000","Swimmer-v1-none-1500.000000-0.001000-0.000000-0.000000"] 12 | names = ["Fixed 20,000 steps", "Fixed 10,000 steps", "Fixed 5,000 steps", "Fixed 1,500 steps"] 13 | 14 | for i in xrange(len(trials)): 15 | with open(trials[i]) as data_file: 16 | data = json.load(data_file) 17 | 18 | times.append([]) 19 | rewards.append([]) 20 | totaltime = 0 21 | 22 | time_since = 0 23 | avg = 0 24 | avgcount = 0 25 | 26 | for e in xrange(len(data["mean_reward"])): 27 | totaltime += data["timesteps"][e] 28 | 29 | time_since += data["timesteps"][e] 30 | avg += data["mean_reward"][e] 31 | avgcount += 1 32 | 33 | if time_since > 10000 and totaltime < 6000000: 34 | time_since = 0 35 | # totaltime += 1 36 | if i == 0: 37 | times[i].append(totaltime) 38 | else: 39 | times[i].append(totaltime) 40 | rewards[i].append(data["mean_reward"][e]) 41 | 42 | avg = 0 43 | avgcount = 0 44 | 45 | t.append(np.array(times[i])) 46 | r.append(np.array(rewards[i])) 47 | 48 | plt.plot(t[i],r[i],color=(1 - (i/5.0),i/5.0,1.0),label=names[i]) 49 | 50 | plt.xlabel("Environment Steps Seen") 51 | plt.ylabel("Average return") 52 | plt.legend(loc=4) 53 | plt.title("Swimmer-v1") 54 | plt.show() 55 | -------------------------------------------------------------------------------- /trials.txt: -------------------------------------------------------------------------------- 1 | python main.py --task Reacher-v1 --decay_method adaptive-margin --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 0.0005 --timestep_adapt 300 2 | 3 | python main.py --task Reacher-v1 --decay_method adaptive --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 0.0005 --timestep_adapt 300 4 | python main.py --task Reacher-v1 --decay_method adaptive --timesteps_per_batch 20000 --max_kl 0.001 --kl_adapt 0.0005 --timestep_adapt 0 5 | python main.py --task Reacher-v1 --decay_method adaptive --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 0 --timestep_adapt 300 6 | 7 | python main.py --task Reacher-v1 --decay_method none --timesteps_per_batch 20000 --max_kl 0.001 8 | python main.py --task Reacher-v1 --decay_method none --timesteps_per_batch 20000 --max_kl 0.005 9 | python main.py --task Reacher-v1 --decay_method none --timesteps_per_batch 20000 --max_kl 0.010 10 | 11 | # python main.py --task Reacher-v1 --decay_method none --timesteps_per_batch 20000 --max_kl 0.001 12 | python main.py --task Reacher-v1 --decay_method none --timesteps_per_batch 10000 --max_kl 0.001 13 | python main.py --task Reacher-v1 --decay_method none --timesteps_per_batch 5000 --max_kl 0.001 14 | python main.py --task Reacher-v1 --decay_method none --timesteps_per_batch 1500 --max_kl 0.001 15 | 16 | 17 | do all of these, and then repeat for Swimmer, Hopper, and HalfCheetah. 18 | 19 | 20 | Status so far: 21 | 22 | On reacher: lower steps and higher KL always best, and adapting both works 23 | On halfcheetah: lower steps better but more variation, higher KL always best, adapting both crashes, adapting KL is good 24 | On swimmer: midground best for steps/KL, adapting both crashes, adapting steps is good 25 | 26 | try: adapting with a margin of 10%? you need to get higher than 10% avg to adapt. 27 | this should let the numbers hold in the middle (for ex. in swimmer env) 28 | -------------------------------------------------------------------------------- /results/vs_fixed/swimmer/compare-nomargin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Swimmer-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 12 | "Swimmer-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 13 | "Swimmer-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 14 | "Swimmer-v1-none-5000.000000-0.001000-0.000000-0.000000", 15 | "Swimmer-v1-none-20000.000000-0.005000-0.000000-0.000000", 16 | "Swimmer-v1-none-20000.000000-0.001000-0.000000-0.000000"] 17 | 18 | names = ["Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (5,000)", "Optimal KL (0.005)", "Original steps/KL"] 19 | for i in xrange(len(trials)): 20 | with open(trials[i]) as data_file: 21 | data = json.load(data_file) 22 | 23 | times.append([]) 24 | rewards.append([]) 25 | totaltime = 0 26 | 27 | time_since = 0 28 | avg = 0 29 | avgcount = 0 30 | 31 | for e in xrange(len(data["mean_reward"])): 32 | totaltime += data["timesteps"][e] 33 | 34 | time_since += data["timesteps"][e] 35 | avg += data["mean_reward"][e] 36 | avgcount += 1 37 | 38 | if time_since > 10000: 39 | time_since = 0 40 | # totaltime += 1 41 | if i == 0: 42 | times[i].append(totaltime) 43 | else: 44 | times[i].append(totaltime) 45 | rewards[i].append(avg/avgcount) 46 | 47 | avg = 0 48 | avgcount = 0 49 | 50 | t.append(np.array(times[i])) 51 | r.append(np.array(rewards[i])) 52 | 53 | plt.plot(t[i],r[i],color=(1 - (i/7.0),i/7.0,1.0),label=names[i]) 54 | 55 | plt.xlabel("Environment Steps Seen") 56 | plt.ylabel("Average return") 57 | plt.legend(loc=4) 58 | plt.title("Swimmer-v1") 59 | plt.show() 60 | -------------------------------------------------------------------------------- /results/vs_fixed/reacher/compare-nomargin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Reacher-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 12 | "Reacher-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 13 | "Reacher-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 14 | "Reacher-v1-none-1500.000000-0.001000-0.000000-0.000000", 15 | "Reacher-v1-none-20000.000000-0.010000-0.000000-0.000000", 16 | "Reacher-v1-none-20000.000000-0.001000-0.000000-0.000000"] 17 | names = ["Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (1500)", "Optimal KL (0.01)","Orginal steps/KL"] 18 | for i in xrange(len(trials)): 19 | with open(trials[i]) as data_file: 20 | data = json.load(data_file) 21 | 22 | times.append([]) 23 | rewards.append([]) 24 | totaltime = 0 25 | 26 | time_since = 0 27 | avg = 0 28 | avgcount = 0 29 | 30 | for e in xrange(len(data["mean_reward"])): 31 | totaltime += data["timesteps"][e] 32 | 33 | time_since += data["timesteps"][e] 34 | avg += data["mean_reward"][e] 35 | avgcount += 1 36 | 37 | if time_since > 10000 and totaltime < 4000000: 38 | time_since = 0 39 | # totaltime += 1 40 | if i == 0: 41 | times[i].append(totaltime) 42 | else: 43 | times[i].append(totaltime) 44 | rewards[i].append(avg/avgcount) 45 | 46 | avg = 0 47 | avgcount = 0 48 | 49 | t.append(np.array(times[i])) 50 | r.append(np.array(rewards[i])) 51 | 52 | plt.plot(t[i],r[i],color=(1 - (i/5.0),i/5.0,1.0),label=names[i]) 53 | 54 | plt.xlabel("Environment Steps Seen") 55 | plt.ylabel("Average return") 56 | plt.legend(loc=4) 57 | plt.title("Reacher-v1") 58 | plt.show() 59 | -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah/compare-nomargin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["HalfCheetah-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 12 | "HalfCheetah-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 13 | "HalfCheetah-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 14 | "HalfCheetah-v1-none-1500.000000-0.001000-0.000000-0.000000", 15 | "HalfCheetah-v1-none-20000.000000-0.010000-0.000000-0.000000", 16 | "HalfCheetah-v1-none-20000.000000-0.001000-0.000000-0.000000"] 17 | 18 | names = ["Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (1500)", "Optimal KL (0.01)", "Original steps/KL"] 19 | for i in xrange(len(trials)): 20 | with open(trials[i]) as data_file: 21 | data = json.load(data_file) 22 | 23 | times.append([]) 24 | rewards.append([]) 25 | totaltime = 0 26 | 27 | time_since = 0 28 | avg = 0 29 | avgcount = 0 30 | 31 | for e in xrange(len(data["mean_reward"])): 32 | totaltime += data["timesteps"][e] 33 | 34 | time_since += data["timesteps"][e] 35 | avg += data["mean_reward"][e] 36 | avgcount += 1 37 | 38 | if time_since > 20000 and totaltime < 10000000: 39 | time_since = 0 40 | # totaltime += 1 41 | if i == 0: 42 | times[i].append(totaltime) 43 | else: 44 | times[i].append(totaltime) 45 | rewards[i].append(avg/avgcount) 46 | 47 | avg = 0 48 | avgcount = 0 49 | 50 | t.append(np.array(times[i])) 51 | r.append(np.array(rewards[i])) 52 | 53 | plt.plot(t[i],r[i],label=names[i]) 54 | 55 | plt.xlabel("Environment Steps Seen") 56 | plt.ylabel("Average return") 57 | leg = plt.legend(loc=4) 58 | for legobj in leg.legendHandles: 59 | legobj.set_linewidth(2.0) 60 | plt.title("HalfCheetah-v1") 61 | plt.show() 62 | -------------------------------------------------------------------------------- /results/vs_fixed/swimmer/params.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Swimmer-v1-adaptive-margin-1000.000000-0.001000-300.000000-0.000500", 12 | "Swimmer-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 13 | "Swimmer-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 14 | "Swimmer-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 15 | "Swimmer-v1-none-5000.000000-0.001000-0.000000-0.000000", 16 | "Swimmer-v1-none-20000.000000-0.005000-0.000000-0.000000", 17 | "Swimmer-v1-none-20000.000000-0.001000-0.000000-0.000000"] 18 | 19 | names = ["Adapt both w/ margin","Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (5,000)", "Optimal KL (0.005)", "Original steps/KL"] 20 | for i in xrange(len(trials)): 21 | with open(trials[i]) as data_file: 22 | data = json.load(data_file) 23 | 24 | times.append([]) 25 | rewards.append([]) 26 | totaltime = 0 27 | 28 | time_since = 0 29 | avg = 0 30 | avgcount = 0 31 | 32 | for e in xrange(len(data["mean_reward"])): 33 | totaltime += data["timesteps"][e] 34 | 35 | time_since += data["timesteps"][e] 36 | avg += data["timesteps"][e] 37 | avgcount += 1 38 | 39 | if time_since > 10000: 40 | time_since = 0 41 | # totaltime += 1 42 | if i == 0: 43 | times[i].append(totaltime) 44 | else: 45 | times[i].append(totaltime) 46 | rewards[i].append(avg/avgcount) 47 | 48 | avg = 0 49 | avgcount = 0 50 | 51 | t.append(np.array(times[i])) 52 | r.append(np.array(rewards[i])) 53 | 54 | lin, = plt.plot(t[i],r[i],label=names[i]) 55 | # if i == 0: 56 | # lin.remove() 57 | 58 | plt.xlabel("Environment Steps Seen") 59 | plt.ylabel("Average return") 60 | leg = plt.legend(loc=4) 61 | for legobj in leg.legendHandles: 62 | legobj.set_linewidth(2.0) 63 | plt.title("Swimmer-v1") 64 | plt.show() 65 | -------------------------------------------------------------------------------- /results/vs_fixed/swimmer2/compare.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Swimmer-v1-adaptive-margin-1000.000000-0.001000-300.000000-0.000500", 12 | "Swimmer-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 13 | "Swimmer-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 14 | "Swimmer-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 15 | "Swimmer-v1-none-5000.000000-0.001000-0.000000-0.000000", 16 | "Swimmer-v1-none-20000.000000-0.005000-0.000000-0.000000", 17 | "Swimmer-v1-none-20000.000000-0.001000-0.000000-0.000000"] 18 | 19 | names = ["Adapt both w/ margin","Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (5,000)", "Optimal KL (0.005)", "Original steps/KL"] 20 | for i in xrange(len(trials)): 21 | with open(trials[i]) as data_file: 22 | data = json.load(data_file) 23 | 24 | times.append([]) 25 | rewards.append([]) 26 | totaltime = 0 27 | 28 | time_since = 0 29 | avg = 0 30 | avgcount = 0 31 | 32 | for e in xrange(len(data["mean_reward"])): 33 | totaltime += data["timesteps"][e] 34 | 35 | time_since += data["timesteps"][e] 36 | avg += data["mean_reward"][e] 37 | avgcount += 1 38 | 39 | if time_since > 10000: 40 | time_since = 0 41 | # totaltime += 1 42 | if i == 0: 43 | times[i].append(totaltime) 44 | else: 45 | times[i].append(totaltime) 46 | rewards[i].append(avg/avgcount) 47 | 48 | avg = 0 49 | avgcount = 0 50 | 51 | t.append(np.array(times[i])) 52 | r.append(np.array(rewards[i])) 53 | 54 | lin, = plt.plot(t[i],r[i],label=names[i]) 55 | if i == 0: 56 | lin.remove() 57 | 58 | plt.xlabel("Environment Steps Seen") 59 | plt.ylabel("Average return") 60 | leg = plt.legend(loc=4) 61 | for legobj in leg.legendHandles: 62 | legobj.set_linewidth(2.0) 63 | plt.title("Swimmer-v1") 64 | plt.show() 65 | -------------------------------------------------------------------------------- /results/vs_fixed/swimmer/compare.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Swimmer-v1-adaptive-margin-1000.000000-0.001000-300.000000-0.000500", 12 | "Swimmer-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 13 | "Swimmer-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 14 | "Swimmer-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 15 | "Swimmer-v1-none-5000.000000-0.001000-0.000000-0.000000", 16 | "Swimmer-v1-none-20000.000000-0.005000-0.000000-0.000000", 17 | "Swimmer-v1-none-20000.000000-0.001000-0.000000-0.000000"] 18 | 19 | names = ["Adapt both w/ margin","Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (5,000)", "Optimal KL (0.005)", "Original steps/KL"] 20 | for i in xrange(len(trials)): 21 | with open(trials[i]) as data_file: 22 | data = json.load(data_file) 23 | 24 | times.append([]) 25 | rewards.append([]) 26 | totaltime = 0 27 | 28 | time_since = 0 29 | avg = 0 30 | avgcount = 0 31 | 32 | for e in xrange(len(data["mean_reward"])): 33 | totaltime += data["timesteps"][e] 34 | 35 | time_since += data["timesteps"][e] 36 | avg += data["mean_reward"][e] 37 | avgcount += 1 38 | 39 | if time_since > 10000: 40 | time_since = 0 41 | # totaltime += 1 42 | if i == 0: 43 | times[i].append(totaltime) 44 | else: 45 | times[i].append(totaltime) 46 | rewards[i].append(avg/avgcount) 47 | 48 | avg = 0 49 | avgcount = 0 50 | 51 | t.append(np.array(times[i])) 52 | r.append(np.array(rewards[i])) 53 | 54 | lin, = plt.plot(t[i],r[i],label=names[i]) 55 | # if i == 0: 56 | # lin.remove() 57 | 58 | plt.xlabel("Environment Steps Seen") 59 | plt.ylabel("Average return") 60 | leg = plt.legend(loc=4) 61 | for legobj in leg.legendHandles: 62 | legobj.set_linewidth(2.0) 63 | plt.title("Swimmer-v1") 64 | plt.show() 65 | -------------------------------------------------------------------------------- /results/vs_fixed/reacher/compare.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Reacher-v1-adaptive-margin-1000.000000-0.001000-300.000000-0.000500", 12 | "Reacher-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 13 | "Reacher-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 14 | "Reacher-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 15 | "Reacher-v1-none-1500.000000-0.001000-0.000000-0.000000", 16 | "Reacher-v1-none-20000.000000-0.010000-0.000000-0.000000", 17 | "Reacher-v1-none-20000.000000-0.001000-0.000000-0.000000"] 18 | names = ["Adapt both w/ margin","Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (1500)", "Optimal KL (0.01)","Orginal steps/KL"] 19 | for i in xrange(len(trials)): 20 | with open(trials[i]) as data_file: 21 | data = json.load(data_file) 22 | 23 | times.append([]) 24 | rewards.append([]) 25 | totaltime = 0 26 | 27 | time_since = 0 28 | avg = 0 29 | avgcount = 0 30 | 31 | for e in xrange(len(data["mean_reward"])): 32 | totaltime += data["timesteps"][e] 33 | 34 | time_since += data["timesteps"][e] 35 | avg += data["mean_reward"][e] 36 | avgcount += 1 37 | 38 | if time_since > 10000 and totaltime < 4000000: 39 | time_since = 0 40 | # totaltime += 1 41 | if i == 0: 42 | times[i].append(totaltime) 43 | else: 44 | times[i].append(totaltime) 45 | rewards[i].append(avg/avgcount) 46 | 47 | avg = 0 48 | avgcount = 0 49 | 50 | t.append(np.array(times[i])) 51 | r.append(np.array(rewards[i])) 52 | 53 | lin, = plt.plot(t[i],r[i],label=names[i]) 54 | if i == 0: 55 | lin.remove() 56 | 57 | 58 | plt.xlabel("Environment Steps Seen") 59 | plt.ylabel("Average return") 60 | leg = plt.legend(loc=4) 61 | for legobj in leg.legendHandles: 62 | legobj.set_linewidth(2.0) 63 | plt.title("Reacher-v1") 64 | plt.show() 65 | -------------------------------------------------------------------------------- /results/vs_fixed/reacher2/compare.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["Reacher-v1-adaptive-margin-1000.000000-0.001000-300.000000-0.000500", 12 | "Reacher-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 13 | "Reacher-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 14 | "Reacher-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 15 | "Reacher-v1-none-1500.000000-0.001000-0.000000-0.000000", 16 | "Reacher-v1-none-20000.000000-0.010000-0.000000-0.000000", 17 | "Reacher-v1-none-20000.000000-0.001000-0.000000-0.000000"] 18 | names = ["Adapt both w/ margin","Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (1500)", "Optimal KL (0.01)","Orginal steps/KL"] 19 | for i in xrange(len(trials)): 20 | with open(trials[i]) as data_file: 21 | data = json.load(data_file) 22 | 23 | times.append([]) 24 | rewards.append([]) 25 | totaltime = 0 26 | 27 | time_since = 0 28 | avg = 0 29 | avgcount = 0 30 | 31 | for e in xrange(len(data["mean_reward"])): 32 | totaltime += data["timesteps"][e] 33 | 34 | time_since += data["timesteps"][e] 35 | avg += data["mean_reward"][e] 36 | avgcount += 1 37 | 38 | if time_since > 10000 and totaltime < 4000000: 39 | time_since = 0 40 | # totaltime += 1 41 | if i == 0: 42 | times[i].append(totaltime) 43 | else: 44 | times[i].append(totaltime) 45 | rewards[i].append(avg/avgcount) 46 | 47 | avg = 0 48 | avgcount = 0 49 | 50 | t.append(np.array(times[i])) 51 | r.append(np.array(rewards[i])) 52 | 53 | lin, = plt.plot(t[i],r[i],label=names[i]) 54 | # if i == 0: 55 | # lin.remove() 56 | 57 | 58 | plt.xlabel("Environment Steps Seen") 59 | plt.ylabel("Average return") 60 | leg = plt.legend(loc=4) 61 | for legobj in leg.legendHandles: 62 | legobj.set_linewidth(2.0) 63 | plt.title("Reacher-v1") 64 | plt.show() 65 | -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah2/compare.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["HalfCheetah-v1-adaptive-margin-1000.000000-0.001000-300.000000-0.000500", 12 | "HalfCheetah-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 13 | "HalfCheetah-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 14 | "HalfCheetah-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 15 | "HalfCheetah-v1-none-1500.000000-0.001000-0.000000-0.000000", 16 | "HalfCheetah-v1-none-20000.000000-0.010000-0.000000-0.000000", 17 | "HalfCheetah-v1-none-20000.000000-0.001000-0.000000-0.000000"] 18 | 19 | names = ["Adapt both w/ margin","Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (1500)", "Optimal KL (0.01)", "Original steps/KL"] 20 | for i in xrange(len(trials)): 21 | with open(trials[i]) as data_file: 22 | data = json.load(data_file) 23 | 24 | times.append([]) 25 | rewards.append([]) 26 | totaltime = 0 27 | 28 | time_since = 0 29 | avg = 0 30 | avgcount = 0 31 | 32 | for e in xrange(len(data["mean_reward"])): 33 | totaltime += data["timesteps"][e] 34 | 35 | time_since += data["timesteps"][e] 36 | avg += data["mean_reward"][e] 37 | avgcount += 1 38 | 39 | if time_since > 20000 and totaltime < 10000000: 40 | time_since = 0 41 | # totaltime += 1 42 | if i == 0: 43 | times[i].append(totaltime) 44 | else: 45 | times[i].append(totaltime) 46 | rewards[i].append(avg/avgcount) 47 | 48 | avg = 0 49 | avgcount = 0 50 | 51 | t.append(np.array(times[i])) 52 | r.append(np.array(rewards[i])) 53 | 54 | lin, = plt.plot(t[i],r[i],label=names[i]) 55 | if i == 0: 56 | lin.remove() 57 | 58 | plt.xlabel("Environment Steps Seen") 59 | plt.ylabel("Average return") 60 | leg = plt.legend(loc=4) 61 | for legobj in leg.legendHandles: 62 | legobj.set_linewidth(2.0) 63 | plt.title("HalfCheetah-v1") 64 | plt.show() 65 | -------------------------------------------------------------------------------- /results/vs_fixed/halfcheetah/compare.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import json 4 | import sys 5 | 6 | times = [] 7 | rewards = [] 8 | t = [] 9 | r = [] 10 | 11 | trials = ["HalfCheetah-v1-adaptive-margin-1000.000000-0.001000-300.000000-0.000500", 12 | "HalfCheetah-v1-adaptive-1000.000000-0.001000-300.000000-0.000500", 13 | "HalfCheetah-v1-adaptive-1000.000000-0.001000-300.000000-0.000000", 14 | "HalfCheetah-v1-adaptive-20000.000000-0.001000-0.000000-0.000500", 15 | "HalfCheetah-v1-none-1500.000000-0.001000-0.000000-0.000000", 16 | "HalfCheetah-v1-none-20000.000000-0.010000-0.000000-0.000000", 17 | "HalfCheetah-v1-none-20000.000000-0.001000-0.000000-0.000000"] 18 | 19 | names = ["Adapt both w/ margin","Adapt both", "Adapt steps", "Adapt KL", "Optimal steps (1500)", "Optimal KL (0.01)", "Original steps/KL"] 20 | for i in xrange(len(trials)): 21 | with open(trials[i]) as data_file: 22 | data = json.load(data_file) 23 | 24 | times.append([]) 25 | rewards.append([]) 26 | totaltime = 0 27 | 28 | time_since = 0 29 | avg = 0 30 | avgcount = 0 31 | 32 | for e in xrange(len(data["mean_reward"])): 33 | totaltime += data["timesteps"][e] 34 | 35 | time_since += data["timesteps"][e] 36 | avg += data["mean_reward"][e] 37 | avgcount += 1 38 | 39 | if time_since > 20000 and totaltime < 10000000: 40 | time_since = 0 41 | # totaltime += 1 42 | if i == 0: 43 | times[i].append(totaltime) 44 | else: 45 | times[i].append(totaltime) 46 | rewards[i].append(avg/avgcount) 47 | 48 | avg = 0 49 | avgcount = 0 50 | 51 | t.append(np.array(times[i])) 52 | r.append(np.array(rewards[i])) 53 | 54 | lin, = plt.plot(t[i],r[i],label=names[i]) 55 | # if i == 0: 56 | # lin.remove() 57 | 58 | plt.xlabel("Environment Steps Seen") 59 | plt.ylabel("Average return") 60 | leg = plt.legend(loc=4) 61 | for legobj in leg.legendHandles: 62 | legobj.set_linewidth(2.0) 63 | plt.title("HalfCheetah-v1") 64 | plt.show() 65 | -------------------------------------------------------------------------------- /trials_old.txt: -------------------------------------------------------------------------------- 1 | python main.py --task Reacher-v1 --decay_method linear --timesteps_per_batch 10000 --max_kl 0.01 --kl_adapt 0.001 --timestep_adapt 0 2 | python main.py --task Reacher-v1 --decay_method linear --timesteps_per_batch 10000 --max_kl 0.01 --kl_adapt 0.0001 --timestep_adapt 0 3 | python main.py --task Reacher-v1 --decay_method linear --timesteps_per_batch 10000 --max_kl 0.01 --kl_adapt 0.00001 --timestep_adapt 0 4 | python main.py --task Reacher-v1 --decay_method linear --timesteps_per_batch 10000 --max_kl 0.01 --kl_adapt 0.000001 --timestep_adapt 0 5 | 6 | python main.py --task Reacher-v1 --decay_method linear --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 0 --timestep_adapt 20 7 | python main.py --task Reacher-v1 --decay_method linear --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 0 --timestep_adapt 100 8 | python main.py --task Reacher-v1 --decay_method linear --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 0 --timestep_adapt 500 9 | 10 | python main.py --task Reacher-v1 --decay_method adaptive --timesteps_per_batch 10000 --max_kl 0.001 --kl_adapt 0.0001 --timestep_adapt 0 11 | python main.py --task Reacher-v1 --decay_method adaptive --timesteps_per_batch 10000 --max_kl 0.001 --kl_adapt 0.00001 --timestep_adapt 0 12 | python main.py --task Reacher-v1 --decay_method adaptive --timesteps_per_batch 10000 --max_kl 0.001 --kl_adapt 0.000001 --timestep_adapt 0 13 | 14 | python main.py --task Reacher-v1 --decay_method adaptive --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 0 --timestep_adapt 20 15 | python main.py --task Reacher-v1 --decay_method adaptive --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 0 --timestep_adapt 100 16 | python main.py --task Reacher-v1 --decay_method adaptive --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 0 --timestep_adapt 500 17 | 18 | do all of these, and then repeat for Swimmer, Hopper, and HalfCheetah. 19 | 20 | 21 | 22 | 23 | Status 24 | Reacher (n_iters 305): 25 | linear KL done 26 | linear steps done 27 | adaptive steps done 28 | HalfCheetah (n_iters 2005): 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | # python main.py --task Reacher-v1 --decay_method exponential --timesteps_per_batch 10000 --max_kl 0.01 --kl_adapt 0.9 --timestep_adapt 1 39 | # python main.py --task Reacher-v1 --decay_method exponential --timesteps_per_batch 10000 --max_kl 0.01 --kl_adapt 0.99 --timestep_adapt 1 40 | # python main.py --task Reacher-v1 --decay_method exponential --timesteps_per_batch 10000 --max_kl 0.01 --kl_adapt 0.999 --timestep_adapt 1 41 | # 42 | # python main.py --task Reacher-v1 --decay_method exponential --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 1 --timestep_adapt 1.001 43 | # python main.py --task Reacher-v1 --decay_method exponential --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 1 --timestep_adapt 1.01 44 | # python main.py --task Reacher-v1 --decay_method exponential --timesteps_per_batch 1000 --max_kl 0.001 --kl_adapt 1 --timestep_adapt 1.1 45 | -------------------------------------------------------------------------------- /value_function.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from utils import * 4 | 5 | class VF(object): 6 | coeffs = None 7 | 8 | def __init__(self, session): 9 | self.net = None 10 | self.session = session 11 | 12 | def create_net(self, shape): 13 | hidden_size = 64 14 | print(shape) 15 | self.x = tf.placeholder(tf.float32, shape=[None, shape], name="x") 16 | self.y = tf.placeholder(tf.float32, shape=[None], name="y") 17 | 18 | weight_init = tf.random_uniform_initializer(-0.05, 0.05) 19 | bias_init = tf.constant_initializer(0) 20 | 21 | with tf.variable_scope("VF"): 22 | h1 = tf.nn.relu(fully_connected(self.x, shape, hidden_size, weight_init, bias_init, "h1")) 23 | h2 = tf.nn.relu(fully_connected(h1, hidden_size, hidden_size, weight_init, bias_init, "h2")) 24 | h3 = fully_connected(h2, hidden_size, 1, weight_init, bias_init, "h3") 25 | self.net = tf.reshape(h3, (-1,)) 26 | l2 = tf.nn.l2_loss(self.net - self.y) 27 | self.train = tf.train.AdamOptimizer().minimize(l2) 28 | self.session.run(tf.initialize_all_variables()) 29 | 30 | 31 | def _features(self, path): 32 | o = path["obs"].astype('float32') 33 | o = o.reshape(o.shape[0], -1) 34 | act = path["action_dists"].astype('float32') 35 | l = len(path["rewards"]) 36 | al = np.arange(l).reshape(-1, 1) / 10.0 37 | ret = np.concatenate([o, act, al, np.ones((l, 1))], axis=1) 38 | return ret 39 | 40 | def fit(self, paths): 41 | featmat = np.concatenate([self._features(path) for path in paths]) 42 | if self.net is None: 43 | self.create_net(featmat.shape[1]) 44 | returns = np.concatenate([path["returns"] for path in paths]) 45 | for _ in range(50): 46 | self.session.run(self.train, {self.x: featmat, self.y: returns}) 47 | 48 | def predict(self, path): 49 | if self.net is None: 50 | return np.zeros(len(path["rewards"])) 51 | else: 52 | ret = self.session.run(self.net, {self.x: self._features(path)}) 53 | return np.reshape(ret, (ret.shape[0], )) 54 | 55 | 56 | class LinearVF(object): 57 | coeffs = None 58 | 59 | def _features(self, path): 60 | o = path["obs"].astype('float32') 61 | o = o.reshape(o.shape[0], -1) 62 | l = len(path["rewards"]) 63 | al = np.arange(l).reshape(-1, 1) / 100.0 64 | return np.concatenate([o, o**2, al, al**2, np.ones((l, 1))], axis=1) 65 | 66 | def fit(self, paths): 67 | featmat = np.concatenate([self._features(path) for path in paths]) 68 | returns = np.concatenate([path["returns"] for path in paths]) 69 | n_col = featmat.shape[1] 70 | lamb = 2.0 71 | self.coeffs = np.linalg.lstsq(featmat.T.dot(featmat) + lamb * np.identity(n_col), featmat.T.dot(returns))[0] 72 | 73 | def predict(self, path): 74 | return np.zeros(len(path["rewards"])) if self.coeffs is None else self._features( 75 | path).dot(self.coeffs) 76 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import gym 4 | from utils import * 5 | from model import * 6 | import argparse 7 | from rollouts import * 8 | import json 9 | 10 | 11 | 12 | parser = argparse.ArgumentParser(description='TRPO.') 13 | # these parameters should stay the same 14 | parser.add_argument("--task", type=str, default='Reacher-v1') 15 | parser.add_argument("--timesteps_per_batch", type=int, default=10000) 16 | parser.add_argument("--n_steps", type=int, default=6000000) 17 | parser.add_argument("--gamma", type=float, default=.99) 18 | parser.add_argument("--max_kl", type=float, default=.001) 19 | parser.add_argument("--cg_damping", type=float, default=1e-3) 20 | parser.add_argument("--num_threads", type=int, default=5) 21 | parser.add_argument("--monitor", type=bool, default=False) 22 | 23 | # change these parameters for testing 24 | parser.add_argument("--decay_method", type=str, default="adaptive") # adaptive, none 25 | parser.add_argument("--timestep_adapt", type=int, default=0) 26 | parser.add_argument("--kl_adapt", type=float, default=0) 27 | 28 | args = parser.parse_args() 29 | args.max_pathlength = gym.spec(args.task).timestep_limit 30 | 31 | learner_tasks = multiprocessing.JoinableQueue() 32 | learner_results = multiprocessing.Queue() 33 | learner_env = gym.make(args.task) 34 | 35 | learner = TRPO(args, learner_env.observation_space, learner_env.action_space, learner_tasks, learner_results) 36 | learner.start() 37 | rollouts = ParallelRollout(args) 38 | 39 | learner_tasks.put(1) 40 | learner_tasks.join() 41 | starting_weights = learner_results.get() 42 | rollouts.set_policy_weights(starting_weights) 43 | 44 | start_time = time.time() 45 | history = {} 46 | history["rollout_time"] = [] 47 | history["learn_time"] = [] 48 | history["mean_reward"] = [] 49 | history["timesteps"] = [] 50 | 51 | # start it off with a big negative number 52 | last_reward = -1000000 53 | recent_total_reward = 0 54 | 55 | totalsteps = 0; 56 | 57 | starting_timesteps = args.timesteps_per_batch 58 | starting_kl = args.max_kl 59 | 60 | iteration = 0 61 | while True: 62 | iteration += 1; 63 | 64 | # runs a bunch of async processes that collect rollouts 65 | rollout_start = time.time() 66 | paths = rollouts.rollout() 67 | rollout_time = (time.time() - rollout_start) / 60.0 68 | 69 | # Why is the learner in an async process? 70 | # Well, it turns out tensorflow has an issue: when there's a tf.Session in the main thread 71 | # and an async process creates another tf.Session, it will freeze up. 72 | # To solve this, we just make the learner's tf.Session in its own async process, 73 | # and wait until the learner's done before continuing the main thread. 74 | learn_start = time.time() 75 | learner_tasks.put((2,args.max_kl)) 76 | learner_tasks.put(paths) 77 | learner_tasks.join() 78 | new_policy_weights, mean_reward = learner_results.get() 79 | learn_time = (time.time() - learn_start) / 60.0 80 | print "-------- Iteration %d ----------" % iteration 81 | print "Total time: %.2f mins" % ((time.time() - start_time) / 60.0) 82 | 83 | history["rollout_time"].append(rollout_time) 84 | history["learn_time"].append(learn_time) 85 | history["mean_reward"].append(mean_reward) 86 | history["timesteps"].append(args.timesteps_per_batch) 87 | history["maxkl"].append(args.max_kl) 88 | 89 | recent_total_reward += mean_reward 90 | 91 | if args.decay_method == "adaptive": 92 | if iteration % 10 == 0: 93 | if recent_total_reward < last_reward: 94 | print "Policy is not improving. Decrease KL and increase steps." 95 | if args.timesteps_per_batch < 20000: 96 | args.timesteps_per_batch += args.timestep_adapt 97 | if args.max_kl > 0.001: 98 | args.max_kl -= args.kl_adapt 99 | else: 100 | print "Policy is improving. Increase KL and decrease steps." 101 | if args.timesteps_per_batch > 1200: 102 | args.timesteps_per_batch -= args.timestep_adapt 103 | if args.max_kl < 0.01: 104 | args.max_kl += args.kl_adapt 105 | last_reward = recent_total_reward 106 | recent_total_reward = 0 107 | 108 | 109 | if args.decay_method == "adaptive-margin": 110 | if iteration % 10 == 0: 111 | scaled_last = last_reward + abs(last_reward * 0.05) 112 | print "Last reward: %f Scaled: %f Recent: %f" % (last_reward, scaled_last, recent_total_reward) 113 | if recent_total_reward < scaled_last: 114 | print "Policy is not improving. Decrease KL and increase steps." 115 | if args.timesteps_per_batch < 10000: 116 | args.timesteps_per_batch += args.timestep_adapt 117 | if args.max_kl > 0.001: 118 | args.max_kl -= args.kl_adapt 119 | else: 120 | print "Policy is improving. Increase KL and decrease steps." 121 | if args.timesteps_per_batch > 1200: 122 | args.timesteps_per_batch -= args.timestep_adapt 123 | if args.max_kl < 0.01: 124 | args.max_kl += args.kl_adapt 125 | last_reward = recent_total_reward 126 | recent_total_reward = 0 127 | 128 | print "Current steps is " + str(args.timesteps_per_batch) + " and KL is " + str(args.max_kl) 129 | 130 | if iteration % 100 == 0: 131 | with open("%s-%s-%f-%f-%f-%f" % (args.task, args.decay_method, starting_timesteps, starting_kl, args.timestep_adapt, args.kl_adapt), "w") as outfile: 132 | json.dump(history,outfile) 133 | 134 | totalsteps += args.timesteps_per_batch 135 | print "%d total steps have happened" % totalsteps 136 | if totalsteps > args.n_steps: 137 | break 138 | 139 | rollouts.set_policy_weights(new_policy_weights) 140 | 141 | rollouts.end() 142 | -------------------------------------------------------------------------------- /results/try1/Hopper-v1-5: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.04108755191167195, 0.04093036651611328, 0.04006659984588623, 0.04395896593729655, 0.04768971602121989, 0.048150082429250084, 0.07136038541793824, 0.06282924811045329, 0.05405436754226685, 0.061475936571757, 0.07490971485773722, 0.07675565083821614, 0.08641124963760376, 0.09087656736373902, 0.09883630275726318, 0.1024884025255839, 0.10510683457056681, 0.1138442317644755, 0.120480748017629, 0.1261780818303426, 0.12543433507283527, 0.1401491681734721, 0.14445453484853107, 0.14606601397196453, 0.14885736703872682, 0.15577868223190308, 0.16228538354237873, 0.16107167005538942, 0.17063826719919842, 0.1666956146558126, 0.1731978972752889, 0.17490250269571941, 0.17825283606847128, 0.17868980169296264, 0.1839186986287435, 0.18753236532211304, 0.18730825185775757, 0.1891059994697571, 0.19353196620941163, 0.197840150197347, 0.2048476020495097, 0.2140971342722575, 0.22146061658859253, 0.22614134947458903, 0.23215205272038777, 0.2331818660100301, 0.24118595123291015, 0.2524975180625916, 0.2614416003227234, 0.263251264890035, 0.2737729827562968, 0.2874451001485189, 0.2961222330729167, 0.30639108419418337, 0.31245131492614747, 0.30933473507563275, 0.3178049802780151, 0.32927449941635134, 0.3285308519999186, 0.33846513032913206, 0.3392813324928284, 0.34860658645629883, 0.3470502813657125, 0.35664663314819334, 0.35595670143763225, 0.35661176840464276, 0.3571766495704651, 0.35620661576588947, 0.3622542341550191, 0.3593873977661133, 0.36452320019404094, 0.3590109825134277, 0.36783369779586794, 0.36795759995778404, 0.36855716705322267, 0.37216861645380656, 0.3722942352294922, 0.3745449662208557, 0.3711693485577901, 0.37308470010757444, 0.379359233379364, 0.37820643186569214, 0.3793450673421224, 0.37720348437627155, 0.3802517334620158, 0.3842138846715291, 0.38446034987767536, 0.3896413008371989, 0.38154921531677244, 0.38553386926651, 0.3901678164800008, 0.3907724340756734, 0.39396716753641764, 0.38943368196487427, 0.39154026508331297, 0.39591633478800453, 0.3957304318745931, 0.3993092656135559, 0.4029371698697408, 0.40168631871541344, 0.40757246414820353], "mean_reward": [16.309743577580686, 18.450459028010176, 15.897150391672231, 20.328343708120915, 23.787116109771308, 21.681332720137206, 31.679443934719348, 28.680504641903699, 34.673186275298434, 45.232270259859327, 61.361804016586667, 64.632810341662676, 81.696665135764391, 87.632761417356093, 98.791699179932152, 107.60562286769117, 110.67322072447845, 124.22985327503369, 135.35857854488532, 143.95245438234727, 143.63268258867495, 161.68638865157681, 171.11532881630808, 172.36100569229563, 178.06138288097603, 189.29960175303066, 201.96686903089591, 199.80908684658007, 215.00201283838976, 210.85121968832433, 220.73564847952665, 224.7143517914071, 229.72911969153611, 234.37648324289768, 240.42153256761853, 250.75224911250817, 252.10337967365453, 256.12762676489882, 262.56803630154138, 275.5680663010009, 289.38705958210272, 304.35959062827919, 320.69679366019028, 331.83631132996049, 341.45445492058423, 348.09681389235925, 360.86844049015787, 381.40507271555549, 402.49182902324702, 412.04499411594935, 434.6318423294498, 462.30020820635684, 474.82940591939405, 496.19870376880613, 506.04585808325328, 503.98966115586938, 528.17204602402558, 548.61802245009358, 550.97513689782465, 571.63414871278519, 576.23375715823659, 592.73209333758507, 593.22294908154208, 612.61944949586768, 613.97760539875583, 617.88338961367288, 619.88734929323823, 618.15590247581508, 630.83365944633886, 630.36846519985022, 641.86574819099542, 633.68740537881854, 652.52366254005426, 656.77517396477424, 654.71818590914188, 659.38712736994682, 665.6852786591345, 673.6940113970627, 667.29064238827505, 675.84723252135825, 684.81268247206344, 688.41176361816065, 691.65976977433297, 691.64020316268181, 698.75696182789898, 702.48249835416641, 706.46620856271966, 715.86610440813877, 704.98366365755237, 715.84027759179003, 724.0962683117267, 730.44468774464826, 736.40758660783615, 729.10701888590813, 737.99052784319804, 749.76405211056465, 748.77913647471053, 755.70698920108202, 765.15917171406863, 767.32315311424065, 778.12970686965741], "learn_time": [0.013489516576131184, 0.012219667434692383, 0.012485718727111817, 0.013231937090555828, 0.014212950070699056, 0.01745338042577108, 0.019399031003316244, 0.016333083311716717, 0.015884268283843993, 0.018063835302988687, 0.022055399417877198, 0.022669466336568196, 0.025266234079996744, 0.02644663651784261, 0.02833951711654663, 0.029282585779825846, 0.030434497197469077, 0.03294238249460856, 0.03466973304748535, 0.03661234776178996, 0.036912802855173746, 0.04069151480992635, 0.041718300183614096, 0.042428048451741536, 0.04302623271942139, 0.044583197434743246, 0.046911966800689694, 0.046699217955271405, 0.049150514602661136, 0.04776343504587809, 0.050430885950724286, 0.050792801380157473, 0.05203771988550822, 0.052204068501790366, 0.053786913553873696, 0.054832367102305095, 0.05410906473795573, 0.057864550749460855, 0.05584733088811238, 0.05729531447092692, 0.059724736213684085, 0.06126451889673869, 0.06469676494598389, 0.06567511955897014, 0.06746026674906412, 0.06698643366495768, 0.06985018650690715, 0.07244595289230346, 0.07460856437683105, 0.07561168273289999, 0.07795756657918294, 0.081765349706014, 0.08465656836827597, 0.08766343196233113, 0.08939138253529867, 0.08828021685282389, 0.09118556578954061, 0.09453886349995931, 0.09350894689559937, 0.0966649850209554, 0.09663721720377604, 0.09978423515955608, 0.10069668292999268, 0.10199996630350748, 0.10317811965942383, 0.10176253318786621, 0.10266420046488443, 0.10130903323491415, 0.10295983552932739, 0.10303699970245361, 0.10493513345718383, 0.10273101727167765, 0.10513645013173421, 0.10475423336029052, 0.10552845001220704, 0.10532236496607462, 0.10633070071538289, 0.10562393267949423, 0.10550398429234822, 0.1059101661046346, 0.10680401722590128, 0.10759774843851726, 0.10778613090515136, 0.10709466934204101, 0.10816640059153239, 0.10805723269780478, 0.10882438023885091, 0.11059371630350749, 0.10912249883015951, 0.10881226857503255, 0.1114989161491394, 0.11176805098851522, 0.1123900850613912, 0.10997803211212158, 0.11065438191095987, 0.11299500068028769, 0.11320026318232218, 0.11375606457392375, 0.11281930208206177, 0.11381973028182983, 0.11527889966964722]} -------------------------------------------------------------------------------- /results/montecarlo-test/20000: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.005602482954661051, 0.10702088673909506, 0.10762686729431152, 0.10877261559168498, 0.10692428350448609, 0.10675871769587199, 0.10666461785634358, 0.10651215314865112, 0.10668698151906332, 0.1067419171333313, 0.10654605229695638, 0.10686448017756144, 0.10673383076985678, 0.1068504810333252, 0.10677576859792073, 0.10671360095342, 0.10677743355433146, 0.1066752831141154, 0.10685458183288574, 0.10652406613032023, 0.10684365034103394, 0.10661096572875976, 0.10679380098978679, 0.10663220087687174, 0.10678840080897013, 0.10670115152994791, 0.10670146942138672, 0.10658344825108847, 0.10673201481501261, 0.10673024654388427, 0.10669146776199341, 0.10659158229827881, 0.10675458510716757, 0.10664823055267333, 0.10666311581929525, 0.10658564964930216, 0.1068199356396993, 0.10655190149943033, 0.10684444904327392, 0.10681933164596558, 0.10686525106430053, 0.10646644830703736, 0.10684728622436523, 0.10663631757100424, 0.10656126737594604, 0.10693110227584839, 0.10667825142542521, 0.10662916501363119, 0.10662733316421509, 0.10647566318511963, 0.10660746494928995, 0.10642793575922647, 0.1069010337193807, 0.1065973162651062, 0.10646353562672933, 0.10639238357543945, 0.10670384963353476, 0.1067011833190918, 0.10680590073267619, 0.10680570205052693, 0.10668499867121378, 0.10645153522491455, 0.10703353087107341, 0.1064434011777242, 0.10688236554463705, 0.10635916789372762, 0.1065511703491211, 0.10645091533660889, 0.1067488153775533, 0.10644726753234864, 0.10649498303731282, 0.1063265323638916, 0.10649574995040893, 0.10653640031814575, 0.10661853154500325, 0.10634613434473673, 0.10664843320846558, 0.1066061814626058, 0.10657664934794107, 0.10634906689325968, 0.10670586824417114, 0.10647981564203898, 0.1067773183186849, 0.10677968660990397, 0.10644606749216716, 0.10659241676330566, 0.10654869874318441, 0.1066875139872233, 0.10660140117009481, 0.10642688274383545, 0.10667548179626465, 0.10683121681213378, 0.10654943386713664, 0.10651703675587972, 0.10649903217951456, 0.10655324856440226, 0.10692453384399414, 0.1064140518506368, 0.10651665131251017, 0.10631646712621053, 0.10643826723098755], "mean_reward": [-115.9082949603627, -108.63765629936846, -103.84008600543216, -101.42812089824791, -96.954160152215493, -92.46079283365971, -91.25916163931538, -86.902692994501152, -81.485302159988493, -79.74985494170744, -78.688928819786213, -73.705786994413245, -72.638834050424478, -71.075154277978541, -66.775452762474657, -64.560057847757236, -61.724675601793827, -60.927678677978079, -59.144317007958946, -57.150601893182674, -54.249643171633437, -51.864704761810785, -51.271104428862031, -50.100744093439594, -46.878943919217271, -47.060797196082042, -44.382730986869348, -44.280891824128076, -42.079756689119428, -41.106851144538375, -39.804525046787148, -37.69439347628434, -36.632132967133252, -36.44255097637695, -35.19632419296677, -34.14782455324967, -33.504933743456021, -32.152573694475876, -30.59007755713116, -30.693204757735156, -28.879207935161475, -28.548194491314927, -27.464128564928682, -26.718844848575536, -26.167585221292171, -24.98503509979281, -24.577002257694055, -23.721540196357566, -22.975745040656836, -22.523903197627796, -22.20148313730542, -21.309556196796539, -20.898601056386273, -20.518020258026965, -20.074372452731897, -18.872198897380557, -18.424554829589589, -18.211112896336648, -17.772083265285438, -17.062392804130496, -17.046655189181351, -16.655669702369291, -16.107158203338471, -15.687184722973603, -15.272943963599641, -14.622511091840778, -14.434167688111257, -13.812629120558778, -13.868935690594313, -13.541693790562347, -13.105021830670358, -12.923508430511065, -12.588376204849796, -12.330517093939802, -11.965114382528206, -11.61410813753198, -11.611759435430269, -11.176070148873912, -11.054174693611316, -10.496839383410924, -10.428177673957242, -10.408037637362868, -10.269782162987154, -9.8242075589801097, -9.8330513264458883, -9.6844373318026964, -9.4665810364829444, -9.4211499602912472, -9.06211961509897, -9.0676005513776161, -9.0293867848637603, -8.5829514689372957, -8.3510275644148582, -8.398118294940133, -8.0328553944224215, -8.0487557848865663, -7.5676575028807846, -7.6966434430431789, -7.4806278201946306, -7.4840661003494624, -7.595012756775545], "learn_time": [0.0035566171010335285, 0.02346106767654419, 0.023077682654062907, 0.023221850395202637, 0.023133699099222818, 0.0228342334429423, 0.02313243548075358, 0.02440505027770996, 0.023030014832814534, 0.022844084103902183, 0.0233224352200826, 0.02246721585591634, 0.022877299785614015, 0.022581720352172853, 0.022725383440653484, 0.023676602045694987, 0.023230000336964925, 0.02297901709874471, 0.023134303092956544, 0.022793265183766682, 0.023204163710276286, 0.022629415988922118, 0.022826818625132243, 0.02291799783706665, 0.0234427014986674, 0.0229582150777181, 0.02297433614730835, 0.022919448216756184, 0.02284486691157023, 0.022851582368214926, 0.02319201628367106, 0.023145417372385662, 0.022727203369140626, 0.02306086619695028, 0.022704601287841797, 0.023037819067637126, 0.02289626598358154, 0.022653067111968996, 0.023109833399454754, 0.02294291655222575, 0.023100399971008302, 0.02303856611251831, 0.022874800364176433, 0.022681768735249838, 0.0229058305422465, 0.02297763427098592, 0.02301803429921468, 0.022862764199574788, 0.02307815154393514, 0.023258535067240398, 0.02376169761021932, 0.022658785184224445, 0.023115118344624836, 0.022646780808766684, 0.023070700963338218, 0.022842450936635336, 0.022983368237813315, 0.022867282231648762, 0.022809449831644693, 0.023228251934051515, 0.02263556718826294, 0.022468117872873943, 0.022805285453796387, 0.02285536527633667, 0.022587696711222332, 0.023129069805145265, 0.022825952370961508, 0.023276233673095705, 0.02269109884897868, 0.022942733764648438, 0.02262955109278361, 0.02340615193049113, 0.022934865951538087, 0.0229612668355306, 0.0231046994527181, 0.022603384653727212, 0.023302185535430908, 0.023103030522664388, 0.022922515869140625, 0.022990934054056802, 0.02298285166422526, 0.022735301653544107, 0.022839013735453287, 0.023086082935333253, 0.022982966899871827, 0.02305051883061727, 0.022678049405415852, 0.02274845043818156, 0.022831551233927407, 0.022923298676808677, 0.022923346360524496, 0.0227601687113444, 0.02299338181813558, 0.022843416531880698, 0.023112301031748453, 0.0225547194480896, 0.023006216684977213, 0.02327143351236979, 0.022732134660085043, 0.02276593049367269, 0.023019619782765708]} -------------------------------------------------------------------------------- /results/montecarlo-test/10xKL: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.005611999829610189, 0.10791378021240235, 0.1081483523050944, 0.10804366668065389, 0.10794121821721395, 0.10773158470789591, 0.10790479977925618, 0.10828761657079061, 0.10800811847050985, 0.10792714754740397, 0.10784843365351358, 0.10778586864471436, 0.10803508361180623, 0.10793661673863729, 0.10781813065210978, 0.10814071496327718, 0.10823008219401041, 0.10769039789835612, 0.10779790083567302, 0.1080015500386556, 0.107914400100708, 0.10786636670430501, 0.1078243335088094, 0.10800034999847412, 0.10782891909281413, 0.10787026484807333, 0.10788633426030476, 0.10768091678619385, 0.1081145683924357, 0.10779448350270589, 0.10784161488215128, 0.10795806646347046, 0.1079790472984314, 0.10764508247375489, 0.10768166780471802, 0.10771633386611938, 0.10777931610743205, 0.10783750216166178, 0.10747776826222738, 0.10793730020523071, 0.10771868626276652, 0.10753993193308513, 0.10788094997406006, 0.10797418355941772, 0.1080299178759257, 0.10773530006408691, 0.10778028170267741, 0.1077899177869161, 0.10781391859054565, 0.10791800022125245, 0.1082468310991923, 0.10807941357294719, 0.10776326656341553, 0.10758853356043498, 0.10776204665501912, 0.10769833326339721, 0.10808896621068319, 0.10759381850560507, 0.10789166688919068, 0.10772635142008463, 0.10777463515599568, 0.10791978438695272, 0.10794968207677205, 0.1077868660291036, 0.10775751670201619, 0.10809413194656373, 0.10776884953180949, 0.10809576511383057, 0.10770313342412313, 0.10783933401107788, 0.10788679917653402, 0.1077742338180542, 0.10802658001581827, 0.10795743465423584, 0.1081066648165385, 0.10762664874394735, 0.10791861613591512, 0.10763848225275675, 0.10777299801508586, 0.10770920117696127, 0.10761161645253499, 0.10762766599655152, 0.10787145296732585, 0.10750203529993693, 0.10750243266423544, 0.10783986647923788, 0.10786886612574259, 0.10754956404368082, 0.10787503321965536, 0.10782260100046794, 0.10784540176391602, 0.10768771568934123, 0.10784411827723185, 0.10788189967473348, 0.10765291452407837, 0.10773361523946126, 0.10782150030136109, 0.10783846378326416, 0.10759923458099366, 0.10768835147221884, 0.10771890083948771], "mean_reward": [-103.0749652609114, -108.4594449345194, -98.151844602211938, -89.040262131582423, -78.075538172846279, -68.629831662556171, -62.669468302604237, -57.330844645757089, -50.773082475033902, -46.11478352097437, -40.464775570991819, -37.494976703717306, -33.785795425620499, -30.375023974178141, -28.819187179032042, -25.557388651117435, -23.947387977407075, -21.803354854441586, -19.749013117239791, -18.576815496833817, -16.92208563878512, -15.509378500655108, -14.629650417646621, -13.42929582104367, -12.539599452487053, -11.82880572091843, -10.820142151554069, -10.00470196703322, -9.5755168149811531, -9.2128776060249269, -8.6629132246675908, -7.9892335131467211, -7.2796654212275271, -7.1326724717251739, -6.8120797353014684, -6.8320114151412703, -6.6514917498929034, -6.3012555176492144, -6.098908986412769, -6.0161019615850977, -5.9765312000857787, -5.7886607726597461, -5.6043961961314261, -5.4529888893309559, -5.3349507363885955, -5.4132032549154534, -5.2128056110894132, -5.3175197080288577, -5.280536066955829, -5.0338820888107207, -5.3096626465092029, -5.0392145908587942, -5.0048487546860025, -5.1522745948647941, -5.2246390327606793, -5.2191911791297203, -4.9961217545788141, -4.7974552946948359, -4.9612296392194279, -4.9639209177590145, -5.0190138786592478, -4.7601231983426731, -4.8428854463901141, -4.9678309494734698, -4.9112253154636525, -4.7999361926104056, -4.9631484751709936, -4.7479352292175783, -4.7445593047341905, -4.7684721288534799, -4.7443386240336096, -4.7219422705618035, -4.7901960528673557, -4.7171040601293672, -4.972175647830186, -4.9454073078477547, -4.7191861097469312, -4.8507808561676677, -4.7499389774094327, -4.5110681126275383, -4.5963079366701036, -4.657552716590029, -4.7310771081205143, -4.6150913886096765, -4.8379033804580551, -4.5669404873023263, -4.7354110913842851, -4.6378495185980899, -4.5462521847556241, -4.590565751272484, -4.6039295257693915, -4.6336194103428774, -4.762026248614406, -4.5257944445041431, -4.4943329463469857, -4.5543503683494109, -4.6460681840416935, -4.7825872968734435, -4.549884411012342, -4.6681655832596123, -4.5553162531149241], "learn_time": [0.003663933277130127, 0.023195266723632812, 0.022860467433929443, 0.02322548230489095, 0.02295251687367757, 0.023057250181833903, 0.02268170118331909, 0.024271249771118164, 0.022600249449412028, 0.022869499524434407, 0.022858615716298422, 0.02264143228530884, 0.022750635941823322, 0.02257083257039388, 0.022768747806549073, 0.023286434014638265, 0.022876032193501792, 0.023490965366363525, 0.023026235898335776, 0.022497034072875975, 0.023082780838012695, 0.022798617680867512, 0.022720718383789064, 0.023294099171956382, 0.023191301027933757, 0.02270021438598633, 0.023056546847025555, 0.02253738244374593, 0.022597269217173258, 0.022565599282582602, 0.02327729860941569, 0.022858448823293052, 0.02284748156865438, 0.0231037974357605, 0.022850402196248374, 0.022855631510416665, 0.022981051603953043, 0.02323760191599528, 0.02267878452936808, 0.02275505065917969, 0.023022051652272543, 0.022357749938964843, 0.0232802152633667, 0.022687681516011558, 0.02316570281982422, 0.02265901565551758, 0.02270900011062622, 0.022494232654571532, 0.022788286209106445, 0.02277933359146118, 0.023413697878519695, 0.02285731633504232, 0.022854101657867432, 0.022884583473205565, 0.023433299859364827, 0.022651815414428712, 0.02288508415222168, 0.023146800200144448, 0.023244349161783855, 0.022688718636830647, 0.022864298025767008, 0.023054333527882893, 0.023172215620676676, 0.02287908395131429, 0.023082633813222248, 0.02349721590677897, 0.022746415932973225, 0.023139353593190512, 0.022859450181325278, 0.02292408545811971, 0.023124563694000243, 0.02341020107269287, 0.022819117705027262, 0.023149430751800537, 0.02310485045115153, 0.02282346487045288, 0.023140283425649007, 0.02303084929784139, 0.023247933387756346, 0.023076216379801433, 0.02315201759338379, 0.02296789884567261, 0.023386414845784506, 0.023200635115305582, 0.022711666425069173, 0.022792863845825195, 0.02263714869817098, 0.023138864835103353, 0.02311888535817464, 0.02310641606648763, 0.022878066698710123, 0.022971713542938234, 0.02303221623102824, 0.0227697491645813, 0.022849233945210774, 0.023032331466674806, 0.022785898049672446, 0.02283819913864136, 0.022729202111562093, 0.022920119762420654, 0.022697933514912925]} -------------------------------------------------------------------------------- /results/montecarlo-test/10000: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.00319594939549764, 0.0534944494565328, 0.05359761714935303, 0.05359506607055664, 0.0535229484240214, 0.053684667746225996, 0.05369894901911418, 0.0536532998085022, 0.05343286593755086, 0.05355259974797567, 0.053574581940968834, 0.05347606341044108, 0.053469085693359376, 0.05349798202514648, 0.0536345362663269, 0.05351308186848958, 0.053614517052968345, 0.053768964608510335, 0.05354615052541097, 0.053381017843882245, 0.05365394751230876, 0.05346973339716594, 0.053464134534200035, 0.05354628562927246, 0.05373381773630778, 0.053491830825805664, 0.053523552417755124, 0.05345976750055949, 0.05353793303171794, 0.05345491568247477, 0.0534618337949117, 0.0536626656850179, 0.053341881434122725, 0.05380988518397013, 0.053562819957733154, 0.05362451473871867, 0.05342561403910319, 0.05381149848302205, 0.05352718432744344, 0.05345471700032552, 0.05344486633936564, 0.05381601651509603, 0.05343010028203329, 0.05349786678949992, 0.05349678595860799, 0.053480652968088786, 0.05348879893620809, 0.053616666793823244, 0.053446916739145915, 0.05354666312535604, 0.05360054969787598, 0.05357444683710734, 0.05353766679763794, 0.0535623828570048, 0.053381899992624916, 0.05377973318099975, 0.0534157673517863, 0.05350693464279175, 0.053352566560109456, 0.05351941585540772, 0.053561850388844805, 0.05349531571070353, 0.053398199876149494, 0.053532131512959796, 0.053555750846862794, 0.05361495018005371, 0.05338956514994304, 0.05343016783396403, 0.05357079903284709, 0.05339613358179728, 0.053472065925598146, 0.053439748287200925, 0.05341939926147461, 0.05348213116327922, 0.05354946851730347, 0.05341924826304118, 0.05341595013936361, 0.053488302230834964, 0.053328033288319907, 0.05332566897074382, 0.053196231524149575, 0.053574132919311526, 0.05325620174407959, 0.0533277153968811, 0.053449400266011554, 0.05350861549377441, 0.05342090129852295, 0.05334076484044393, 0.053376015027364096, 0.05357276995976766, 0.05322846968968709, 0.05342518488566081, 0.05359387000401815, 0.05349506537119548, 0.05345088243484497, 0.05331698258717855, 0.05325270096460978, 0.05340331792831421, 0.053353218237559, 0.053809316953023274, 0.05407668352127075], "mean_reward": [-118.26172388725725, -109.72417814900305, -106.94422103103751, -99.85412108809146, -101.43186504071366, -92.680734695151216, -91.500909997883824, -86.170180740847613, -84.365484356860719, -80.843819677349927, -79.516319716660732, -75.447232212191807, -73.046570915809809, -72.649078932185716, -68.309204252057711, -66.253430745263074, -64.979496883610295, -62.27987762733293, -60.301927047360458, -58.091987468148581, -57.494911152319382, -55.973703153556428, -54.633288312798513, -51.401653614090378, -50.345646336543084, -49.030445796136377, -46.429736807501286, -44.902125226281441, -44.91914744271368, -43.766726329280878, -42.145756538069165, -41.025773990092823, -39.000873647008866, -37.993775060299164, -38.297218491275352, -36.207316915565968, -34.886955332366902, -34.247033277384915, -33.955341250382489, -33.303488968303235, -31.764724185892081, -31.198227848839061, -30.211206063392815, -29.137194846509257, -28.578032217353005, -27.313194461945205, -26.460704960722715, -27.37352309345621, -25.797877313469421, -25.258593279973642, -23.965352753249928, -24.345749169351066, -22.898356233999685, -23.024189893053585, -22.000431420862, -21.468554573337553, -20.995921808503859, -20.521378996912507, -19.901590203571374, -19.646332631463764, -18.57428891393441, -18.843445691821255, -18.312248146595895, -17.620487084670941, -17.450684846922186, -16.837344921290789, -16.454490859258126, -16.615787636247202, -15.506002605674988, -15.613663586183163, -14.777118925359151, -14.611042844605176, -14.464639017272003, -13.67452111877747, -13.840099160162449, -13.626260434927165, -13.456937804117928, -13.009848739951536, -12.632661628725995, -12.633511680387697, -12.284454971073657, -11.758215776543066, -11.812986095784268, -11.450671687263103, -11.640326791509796, -10.96446911642639, -10.82359470727779, -10.562001478286597, -10.520647842046701, -10.253686431446516, -10.091636848433136, -9.9802600988549468, -9.8099252150185592, -9.6976116778031152, -9.6589645770352437, -9.2464152554764691, -8.9642190970239444, -8.869724855254864, -8.9030731402857004, -8.8130982766713757, -8.2914634723634553], "learn_time": [0.002913947900136312, 0.012615267435709636, 0.01241453488667806, 0.012194252014160157, 0.012472514311472576, 0.012198833624521892, 0.012316819032033284, 0.01258314847946167, 0.012369247277577718, 0.012079934279123941, 0.0121293306350708, 0.012237298488616943, 0.011998152732849121, 0.01227254867553711, 0.012405232588450114, 0.012390446662902833, 0.012358033657073974, 0.012416450182596843, 0.011823248863220216, 0.0122871994972229, 0.01197831630706787, 0.011980533599853516, 0.0122359832127889, 0.012375032901763916, 0.012173636754353841, 0.01224081516265869, 0.012319366137186686, 0.011994051933288574, 0.011976099014282227, 0.011985814571380616, 0.01213683287302653, 0.011889147758483886, 0.011929515997568767, 0.012150299549102784, 0.012127649784088135, 0.012458733717600505, 0.011987698078155518, 0.012280515829722087, 0.011915600299835205, 0.012241768836975097, 0.011980080604553222, 0.012403500080108643, 0.012089848518371582, 0.012503699461619059, 0.012221701939900716, 0.012136149406433105, 0.012105031808217367, 0.011943435668945313, 0.012369898955027263, 0.012144633134206136, 0.01197521686553955, 0.01236354907353719, 0.012085564931233724, 0.01234076420466105, 0.012062986691792807, 0.012153236071268718, 0.0122001846631368, 0.012237735589345296, 0.012156399091084798, 0.011881383260091145, 0.012114636103312175, 0.012238999207814535, 0.011951148509979248, 0.012038934230804443, 0.01221996545791626, 0.012029516696929931, 0.012290100256601969, 0.012063082059224446, 0.012189714113871257, 0.011960768699645996, 0.012097318967183432, 0.012262582778930664, 0.012262833118438721, 0.012604920069376628, 0.012105317910512288, 0.012461864948272705, 0.01215966542561849, 0.012051002184549967, 0.011921532948811849, 0.011717267831166585, 0.012138680617014567, 0.01168586810429891, 0.012116634845733642, 0.011973782380421957, 0.012001566092173259, 0.012131714820861816, 0.012015235424041749, 0.012060801188151041, 0.011857982476552327, 0.012279399236043294, 0.012037551403045655, 0.01198049783706665, 0.012107499440511068, 0.012189718087514241, 0.011944234371185303, 0.011964714527130127, 0.011757866541544596, 0.012046098709106445, 0.01209023396174113, 0.012212769190470377, 0.012197299798329671]} -------------------------------------------------------------------------------- /results/montecarlo-test/5000: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.0016684333483378093, 0.026654052734375, 0.02661968469619751, 0.02663265069325765, 0.026749169826507567, 0.026750548680623373, 0.026646367708841958, 0.026623499393463135, 0.026700480779012045, 0.02674483060836792, 0.026650647322336834, 0.026561466852823894, 0.026637816429138185, 0.026596399148305257, 0.026579085985819498, 0.026699817180633544, 0.026717952887217202, 0.026658280690511068, 0.02668838103612264, 0.026960317293802896, 0.026693316300710042, 0.02665331761042277, 0.026555315653483073, 0.026710601647694905, 0.02659551699956258, 0.02666421333948771, 0.02659260034561157, 0.026610851287841797, 0.026579848925272622, 0.0266226331392924, 0.026607199509938558, 0.026655733585357666, 0.02663716475168864, 0.02667694886525472, 0.02658458153406779, 0.026850736141204833, 0.0285703182220459, 0.02721023162206014, 0.02669078509012858, 0.026867616176605224, 0.026765052477518717, 0.026610398292541505, 0.026703564325968425, 0.02668144702911377, 0.02665491501490275, 0.026624083518981934, 0.02675868272781372, 0.02657819986343384, 0.026641801993052164, 0.02659308115641276, 0.026681947708129882, 0.02659405072530111, 0.026668516794840495, 0.026589465141296387, 0.027224886417388915, 0.02664544979731242, 0.026794914404551187, 0.026610716183980306, 0.026772685845692954, 0.02661319971084595, 0.02663900057474772, 0.026610533396402996, 0.026703516642252605, 0.026588666439056396, 0.02663748264312744, 0.026586230595906576, 0.02670943339665731, 0.02664656639099121, 0.026680151621500652, 0.02652751604715983, 0.0267623504002889, 0.02661141554514567, 0.02661819855372111, 0.026598815123240152, 0.026600098609924315, 0.026724032560984292, 0.02663960059483846, 0.02663877010345459, 0.02668083111445109, 0.026659452915191652, 0.02657949924468994, 0.02652518351872762, 0.026620884736378986, 0.0266485333442688, 0.026645334561665852, 0.02663438320159912, 0.026576483249664308, 0.026530949274698894, 0.02675713300704956, 0.02669763167699178, 0.026573248704274497, 0.026580615838368734, 0.026484866937001545, 0.026648680369059246, 0.026510183016459146, 0.026588364442189535, 0.02674860159556071, 0.026592167218526204, 0.026617650190989176, 0.026712663968404136, 0.026801764965057373], "mean_reward": [-104.32904806340957, -101.36312241920432, -107.20200793818795, -99.532070448524067, -93.004518817039695, -93.335941092756954, -86.395899588032307, -88.929473018115019, -87.264000962372847, -83.572809600091148, -75.546403982606392, -79.31308954262407, -72.699244014070089, -72.861702504424258, -72.248325294128179, -69.942344635561909, -68.152265390915915, -64.645428390788368, -62.900987122974108, -60.797086879479721, -60.132372586445832, -59.415124479267675, -56.889791548249733, -54.710292313040078, -52.092694501343281, -51.861119416868114, -50.285518689577515, -49.734085375988691, -47.932171926772817, -48.213941542618585, -44.702212095550607, -42.920127253798704, -42.582294653795969, -43.502959906652094, -40.789826756836653, -40.244765902533352, -39.337988946765051, -38.685564047726473, -35.89926771372452, -35.336685986888043, -35.556243098004451, -34.530892931725042, -32.457782431159281, -32.843215207696431, -31.959369034309454, -30.925786303910627, -30.008821083441283, -29.783242670531646, -29.00076069686801, -27.742799008191032, -28.198901606276248, -27.442601857269647, -26.212720148284021, -26.391549324295891, -26.060244510556998, -24.667392512636354, -24.267016647731658, -23.781090393300005, -23.800103957605501, -22.636715389105756, -22.689721894395678, -22.446652598636192, -21.46177953251933, -21.030153018917279, -20.703587070843859, -20.842389686107115, -19.906516712580075, -19.861072163602358, -18.571718321345024, -18.959264249525287, -18.772520026624672, -18.279468659566803, -17.691545107938925, -17.05682743499424, -17.000137994453059, -17.141207944369153, -16.754559364022363, -15.876937109888932, -15.652462792678625, -15.224418731842986, -14.44296058401452, -14.600288422983098, -14.56848553569916, -14.346177575564575, -13.930521586640918, -13.758632701563693, -13.678193128539197, -13.801328636823373, -13.631743458207271, -13.119825382529431, -12.630637609028396, -12.397639226972704, -12.557244106149424, -12.556818074123711, -11.672840307105968, -12.023749466745103, -11.412678230496311, -12.063946686720714, -11.645646882198751, -11.479140866344689, -10.952200725464387], "learn_time": [0.0024944345156351724, 0.007172648111979167, 0.007029469807942708, 0.007040385405222575, 0.007186118761698405, 0.007020501295725504, 0.007084167003631592, 0.007026882966359457, 0.0068160335222880045, 0.006996683279673259, 0.006969098250071208, 0.007049099604288737, 0.007200618584950765, 0.006807700792948405, 0.007128616174062093, 0.0070557832717895504, 0.007024399439493815, 0.007091299692789713, 0.007455364863077799, 0.006994513670603434, 0.00708536704381307, 0.007098964850107829, 0.006944402058919271, 0.007220236460367838, 0.0069591323534647625, 0.007020533084869385, 0.007104984919230143, 0.006999397277832031, 0.006911234060923258, 0.006963785489400228, 0.007164581616719564, 0.007338285446166992, 0.007361133893330892, 0.006868982315063476, 0.007030133406321208, 0.007537118593851725, 0.007341150442759196, 0.007593464851379394, 0.007557801405588786, 0.0069217642148335775, 0.007114632924397787, 0.007321683565775553, 0.0067373832066853845, 0.007033065954844157, 0.006999500592549642, 0.007301886876424153, 0.006977232297261556, 0.007003116607666016, 0.007321250438690185, 0.006972614924112956, 0.006948514779408773, 0.006892383098602295, 0.006990583737691244, 0.007128449281056722, 0.006793765226999918, 0.007078131039937337, 0.007044164339701334, 0.007013181845347087, 0.007315532366434733, 0.007260632514953613, 0.006939983367919922, 0.007428483168284098, 0.00706855058670044, 0.00730436642964681, 0.007287033398946126, 0.006902813911437988, 0.007170184453328451, 0.0068648338317871095, 0.00693665345509847, 0.007030316193898519, 0.007071236769358317, 0.007403632005055745, 0.006937499841054281, 0.007089932759602864, 0.007023648420969645, 0.006803333759307861, 0.007317769527435303, 0.007054968674977621, 0.007397480805714925, 0.007490801811218262, 0.007249848047892252, 0.0066962997118632, 0.0072697838147481285, 0.007059148947397868, 0.0073727846145629885, 0.007257417837778727, 0.007137648264567057, 0.0070536176363627115, 0.006978885332743327, 0.007127849260965983, 0.0069575667381286625, 0.006960916519165039, 0.0069640159606933595, 0.0074913342793782554, 0.007316033045450847, 0.0069484670956929525, 0.0070269187291463215, 0.006971617539723714, 0.0070740660031636555, 0.007044498125712077, 0.007150665918986002]} -------------------------------------------------------------------------------- /results/montecarlo-test/1000-short: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.0008156180381774902, 0.005562130610148112, 0.005531466007232666, 0.005496501922607422, 0.005493398507436117, 0.00552138090133667, 0.005513747533162435, 0.005497201283772787, 0.005467716852823893, 0.005506432056427002, 0.005511868000030518, 0.005494348208109538, 0.005471285184224447, 0.005494165420532227, 0.005537199974060059, 0.005481334527333578, 0.0055153171221415205, 0.005505998929341634, 0.005539099375406901, 0.0054952661196390785, 0.005485816796620687, 0.005503984292348226, 0.005482268333435058, 0.005537899335225424, 0.00551153024037679, 0.005556666851043701, 0.005530782540639241, 0.005507214864095052, 0.005545079708099365, 0.005469334125518799, 0.005532463391621907, 0.005525283018747966, 0.005561733245849609, 0.005505581696828206, 0.005478898684183757, 0.005538133780161539, 0.005594964822133382, 0.005495536327362061, 0.005488848686218262, 0.005482800801595052, 0.005504516760508219, 0.005520518620808919, 0.005498449007670085, 0.005501468976338704, 0.005534219741821289, 0.005485765139261882, 0.005457746982574463, 0.005547253290812174, 0.005521432558695475, 0.0054795185724894205, 0.005450582504272461, 0.00549236536026001, 0.0055358648300170895, 0.0055362502733866375, 0.0055011351903279625, 0.005505065123240153, 0.005491582552591959, 0.005509332815806071, 0.005506483713785807, 0.005479184786478678, 0.0055065194765726725, 0.005486098925272623, 0.005505232016245524, 0.005535884698232015, 0.0055103341738382975, 0.005487465858459472, 0.005542683601379395, 0.005503650506337484, 0.0054909666379292805, 0.005510266621907552, 0.005465416113535563, 0.005506352583567301, 0.0055101315180460615, 0.005456666151682536, 0.005539317925771077, 0.005500217278798421, 0.005470450719197591, 0.005536949634552002, 0.005493466059366862, 0.005463782946268717, 0.0055422147115071615, 0.005514200528462728, 0.005520002047220866, 0.005514800548553467, 0.005494181315104167, 0.005501401424407959, 0.005504882335662842, 0.005464998881022135, 0.005564749240875244, 0.005495997269948324, 0.005504182974497477, 0.005520999431610107, 0.00551533301671346, 0.0055028001467386884, 0.005509066581726074, 0.005494153499603272, 0.005453383922576905, 0.005504119396209717, 0.005514836311340332, 0.00551761786142985, 0.005495901902516683], "mean_reward": [-104.53842159812547, -103.44509564780269, -99.408740960325275, -101.19500668722715, -102.77633856985048, -104.10942502999322, -102.2845933494016, -101.04112539309213, -88.411123892411155, -95.623677160853049, -92.112137398923579, -90.295894533921484, -92.525250753838279, -88.587805776005027, -85.147116532876566, -86.65983623891583, -89.210560413639925, -81.739684850842139, -78.0035245890883, -76.291511953847632, -78.785578349887487, -77.507455820646925, -76.950196936826018, -70.527231086836096, -76.773071685959067, -73.425871739001849, -69.661161275943797, -73.793305275195152, -71.510099703775765, -70.644196210962534, -63.899559364459591, -61.62490982214613, -66.721569403862233, -63.275151536618282, -58.085255206840657, -61.146613251335566, -66.030845550730021, -53.775038810760428, -57.556194620451663, -57.482062510145965, -58.410075616581949, -53.595472615683072, -55.474260522131736, -55.648480429414441, -55.227262295997107, -55.290623148687757, -55.551624116837715, -51.689281429578365, -55.062300623961697, -52.906069319402413, -48.159613383274547, -49.739695791126749, -50.598545767034565, -49.542576587569513, -49.00345353496742, -48.96443610934422, -45.655012874919237, -45.052941039418229, -44.90927595284915, -43.099075182530719, -43.145013457609039, -42.969348890052878, -40.100689831788884, -39.018571855421115, -40.57945211953195, -39.109930752852833, -38.501637975105439, -37.439807298372415, -37.819307661085553, -38.924643272733086, -37.679224060957438, -35.30440825194836, -33.739738040382875, -36.16495338349948, -35.324018809531019, -33.186214542229528, -34.289889401187374, -32.487841826227601, -33.170649454923918, -30.951939547357405, -33.896174067181519, -31.927721278569571, -31.515667184554978, -33.91835725567902, -30.365716480601755, -30.303327246337727, -30.2492102622892, -28.164972360590724, -31.475724470486277, -28.408836415822993, -29.536786815893826, -27.864809371395353, -27.861340087386246, -28.108174193259675, -26.839438891839325, -28.251098968592135, -28.467968137704741, -28.1611162481394, -26.983821086868318, -25.895958179027151, -25.435172027811042], "learn_time": [0.0019321839014689128, 0.0028971473375956217, 0.0024975021680196125, 0.002463817596435547, 0.0024835824966430663, 0.00236738125483195, 0.002518316109975179, 0.002464298407236735, 0.0024760842323303224, 0.0024768670399983725, 0.002518582344055176, 0.0024582306543986005, 0.002294035752614339, 0.0024286667505900067, 0.002450664838155111, 0.0024353186289469402, 0.0025058984756469727, 0.002340535322825114, 0.002446480592091878, 0.002500184377034505, 0.002545448144276937, 0.002442618211110433, 0.0024314522743225098, 0.0025067488352457684, 0.002470115820566813, 0.002419400215148926, 0.0025069475173950194, 0.0025130311648050943, 0.002320114771525065, 0.0025757988293965656, 0.0024667660395304362, 0.0025052507718404133, 0.0024849335352579755, 0.0024759491284688314, 0.002603284517923991, 0.0024776657422383628, 0.002506518363952637, 0.00247648557027181, 0.002597665786743164, 0.0024928967157999676, 0.0024917999903361003, 0.002522619565327962, 0.0025107860565185547, 0.0024999499320983885, 0.0024923841158548993, 0.002814364433288574, 0.0028307318687438967, 0.0025284330050150553, 0.0024726510047912596, 0.0024886329968770346, 0.0025059819221496583, 0.002476998170216878, 0.002452266216278076, 0.0025826334953308106, 0.002426699797312419, 0.00244521697362264, 0.002402969201405843, 0.0024799505869547525, 0.002474331855773926, 0.0023896853129069012, 0.002480785051981608, 0.002863935629526774, 0.002762103080749512, 0.002495749791463216, 0.002823034922281901, 0.0024255832036336263, 0.002523183822631836, 0.0024682005246480306, 0.0022871692975362143, 0.002801398436228434, 0.002462434768676758, 0.002508068084716797, 0.002514914671579997, 0.0028383493423461913, 0.002539249261220296, 0.002588800589243571, 0.0024668176968892417, 0.002508250872294108, 0.002498634656270345, 0.00252078374226888, 0.0026612162590026857, 0.002442967891693115, 0.0024625698725382486, 0.0028454025586446127, 0.002316598097483317, 0.002514950434366862, 0.0024909496307373045, 0.002530519167582194, 0.002470584710439046, 0.0025010983149210612, 0.0024986823399861654, 0.002535732587178548, 0.0025189677874247235, 0.0024697025616963702, 0.0027054826418558757, 0.002344365914662679, 0.0026218493779500327, 0.0025406837463378905, 0.0024789015452067057, 0.0025286356608072917, 0.0028574347496032717]} -------------------------------------------------------------------------------- /rollouts.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import multiprocessing 4 | from utils import * 5 | import gym 6 | import time 7 | import copy 8 | from random import randint 9 | 10 | class Actor(multiprocessing.Process): 11 | def __init__(self, args, task_q, result_q, actor_id, monitor): 12 | multiprocessing.Process.__init__(self) 13 | self.task_q = task_q 14 | self.result_q = result_q 15 | self.args = args 16 | self.monitor = monitor 17 | 18 | 19 | def act(self, obs): 20 | obs = np.expand_dims(obs, 0) 21 | action_dist_mu, action_dist_logstd = self.session.run([self.action_dist_mu, self.action_dist_logstd], feed_dict={self.obs: obs}) 22 | # samples the guassian distribution 23 | act = action_dist_mu + np.exp(action_dist_logstd)*np.random.randn(*action_dist_logstd.shape) 24 | return act.ravel(), action_dist_mu, action_dist_logstd 25 | 26 | def run(self): 27 | 28 | self.env = gym.make(self.args.task) 29 | self.env.seed(randint(0,999999)) 30 | if self.monitor: 31 | self.env.monitor.start('monitor/', force=True) 32 | 33 | # tensorflow variables (same as in model.py) 34 | self.observation_size = self.env.observation_space.shape[0] 35 | self.action_size = np.prod(self.env.action_space.shape) 36 | self.hidden_size = 64 37 | weight_init = tf.random_uniform_initializer(-0.05, 0.05) 38 | bias_init = tf.constant_initializer(0) 39 | # tensorflow model of the policy 40 | self.obs = tf.placeholder(tf.float32, [None, self.observation_size]) 41 | self.debug = tf.constant([2,2]) 42 | with tf.variable_scope("policy-a"): 43 | h1 = fully_connected(self.obs, self.observation_size, self.hidden_size, weight_init, bias_init, "policy_h1") 44 | h1 = tf.nn.relu(h1) 45 | h2 = fully_connected(h1, self.hidden_size, self.hidden_size, weight_init, bias_init, "policy_h2") 46 | h2 = tf.nn.relu(h2) 47 | h3 = fully_connected(h2, self.hidden_size, self.action_size, weight_init, bias_init, "policy_h3") 48 | action_dist_logstd_param = tf.Variable((.01*np.random.randn(1, self.action_size)).astype(np.float32), name="policy_logstd") 49 | self.action_dist_mu = h3 50 | self.action_dist_logstd = tf.tile(action_dist_logstd_param, tf.pack((tf.shape(self.action_dist_mu)[0], 1))) 51 | 52 | config = tf.ConfigProto( 53 | device_count = {'GPU': 0} 54 | ) 55 | self.session = tf.Session(config=config) 56 | self.session.run(tf.initialize_all_variables()) 57 | var_list = tf.trainable_variables() 58 | 59 | self.set_policy = SetPolicyWeights(self.session, var_list) 60 | 61 | while True: 62 | # get a task, or wait until it gets one 63 | next_task = self.task_q.get(block=True) 64 | if next_task == 1: 65 | # the task is an actor request to collect experience 66 | path = self.rollout() 67 | self.task_q.task_done() 68 | self.result_q.put(path) 69 | elif next_task == 2: 70 | print "kill message" 71 | if self.monitor: 72 | self.env.monitor.close() 73 | self.task_q.task_done() 74 | break 75 | else: 76 | # the task is to set parameters of the actor policy 77 | self.set_policy(next_task) 78 | # super hacky method to make sure when we fill the queue with set parameter tasks, 79 | # an actor doesn't finish updating before the other actors can accept their own tasks. 80 | time.sleep(0.1) 81 | self.task_q.task_done() 82 | return 83 | 84 | def rollout(self): 85 | obs, actions, rewards, action_dists_mu, action_dists_logstd = [], [], [], [], [] 86 | ob = filter(self.env.reset()) 87 | for i in xrange(self.args.max_pathlength - 1): 88 | obs.append(ob) 89 | action, action_dist_mu, action_dist_logstd = self.act(ob) 90 | actions.append(action) 91 | action_dists_mu.append(action_dist_mu) 92 | action_dists_logstd.append(action_dist_logstd) 93 | res = self.env.step(action) 94 | ob = filter(res[0]) 95 | rewards.append((res[1])) 96 | if res[2] or i == self.args.max_pathlength - 2: 97 | path = {"obs": np.concatenate(np.expand_dims(obs, 0)), 98 | "action_dists_mu": np.concatenate(action_dists_mu), 99 | "action_dists_logstd": np.concatenate(action_dists_logstd), 100 | "rewards": np.array(rewards), 101 | "actions": np.array(actions)} 102 | return path 103 | break 104 | 105 | class ParallelRollout(): 106 | def __init__(self, args): 107 | self.args = args 108 | 109 | self.tasks = multiprocessing.JoinableQueue() 110 | self.results = multiprocessing.Queue() 111 | 112 | self.actors = [] 113 | self.actors.append(Actor(self.args, self.tasks, self.results, 9999, args.monitor)) 114 | 115 | for i in xrange(self.args.num_threads-1): 116 | self.actors.append(Actor(self.args, self.tasks, self.results, 37*(i+3), False)) 117 | 118 | for a in self.actors: 119 | a.start() 120 | 121 | # we will start by running 20,000 / 1000 = 20 episodes for the first ieration 122 | 123 | self.average_timesteps_in_episode = 1000 124 | 125 | def rollout(self): 126 | 127 | # keep 20,000 timesteps per update 128 | num_rollouts = self.args.timesteps_per_batch / self.average_timesteps_in_episode 129 | 130 | for i in xrange(num_rollouts): 131 | self.tasks.put(1) 132 | 133 | self.tasks.join() 134 | 135 | paths = [] 136 | while num_rollouts: 137 | num_rollouts -= 1 138 | paths.append(self.results.get()) 139 | 140 | self.average_timesteps_in_episode = sum([len(path["rewards"]) for path in paths]) / len(paths) 141 | return paths 142 | 143 | def set_policy_weights(self, parameters): 144 | for i in xrange(self.args.num_threads): 145 | self.tasks.put(parameters) 146 | self.tasks.join() 147 | 148 | def end(self): 149 | for i in xrange(self.args.num_threads): 150 | self.tasks.put(2) 151 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import scipy.signal 4 | 5 | # KL divergence with itself, holding first argument fixed 6 | def gauss_selfKL_firstfixed(mu, logstd): 7 | mu1, logstd1 = map(tf.stop_gradient, [mu, logstd]) 8 | mu2, logstd2 = mu, logstd 9 | 10 | return gauss_KL(mu1, logstd1, mu2, logstd2) 11 | 12 | # probability to take action x, given paramaterized guassian distribution 13 | def gauss_log_prob(mu, logstd, x): 14 | var = tf.exp(2*logstd) 15 | gp = -tf.square(x - mu)/(2*var) - .5*tf.log(tf.constant(2*np.pi)) - logstd 16 | return tf.reduce_sum(gp, [1]) 17 | 18 | # KL divergence between two paramaterized guassian distributions 19 | def gauss_KL(mu1, logstd1, mu2, logstd2): 20 | var1 = tf.exp(2*logstd1) 21 | var2 = tf.exp(2*logstd2) 22 | 23 | kl = tf.reduce_sum(logstd2 - logstd1 + (var1 + tf.square(mu1 - mu2))/(2*var2) - 0.5) 24 | return kl 25 | 26 | # Shannon entropy for a paramaterized guassian distributions 27 | def gauss_ent(mu, logstd): 28 | h = tf.reduce_sum(logstd + tf.constant(0.5*np.log(2*np.pi*np.e), tf.float32)) 29 | return h 30 | 31 | def discount(x, gamma): 32 | assert x.ndim >= 1 33 | return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1] 34 | 35 | def cat_sample(prob_nk): 36 | assert prob_nk.ndim == 2 37 | # prob_nk: batchsize x actions 38 | N = prob_nk.shape[0] 39 | csprob_nk = np.cumsum(prob_nk, axis=1) 40 | out = np.zeros(N, dtype='i') 41 | for (n, csprob_k, r) in zip(xrange(N), csprob_nk, np.random.rand(N)): 42 | for (k, csprob) in enumerate(csprob_k): 43 | if csprob > r: 44 | out[n] = k 45 | break 46 | return out 47 | 48 | def slice_2d(x, inds0, inds1): 49 | inds0 = tf.cast(inds0, tf.int64) 50 | inds1 = tf.cast(inds1, tf.int64) 51 | shape = tf.cast(tf.shape(x), tf.int64) 52 | ncols = shape[1] 53 | x_flat = tf.reshape(x, [-1]) 54 | return tf.gather(x_flat, inds0 * ncols + inds1) 55 | 56 | def var_shape(x): 57 | out = [k.value for k in x.get_shape()] 58 | assert all(isinstance(a, int) for a in out), \ 59 | "shape function assumes that shape is fully known" 60 | return out 61 | 62 | class Filter: 63 | def __init__(self, filter_mean=True): 64 | self.m1 = 0 65 | self.v = 0 66 | self.n = 0. 67 | self.filter_mean = filter_mean 68 | 69 | def __call__(self, o): 70 | self.m1 = self.m1 * (self.n / (self.n + 1)) + o * 1/(1 + self.n) 71 | self.v = self.v * (self.n / (self.n + 1)) + (o - self.m1)**2 * 1/(1 + self.n) 72 | self.std = (self.v + 1e-6)**.5 # std 73 | self.n += 1 74 | if self.filter_mean: 75 | o1 = (o - self.m1)/self.std 76 | else: 77 | o1 = o/self.std 78 | o1 = (o1 > 10) * 10 + (o1 < -10)* (-10) + (o1 < 10) * (o1 > -10) * o1 79 | return o1 80 | filter = Filter() 81 | filter_std = Filter() 82 | 83 | def numel(x): 84 | return np.prod(var_shape(x)) 85 | 86 | def flatgrad(loss, var_list): 87 | grads = tf.gradients(loss, var_list) 88 | return tf.concat(0, [tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)]) 89 | 90 | def conjugate_gradient(f_Ax, b, cg_iters=10, residual_tol=1e-10): 91 | # in numpy 92 | p = b.copy() 93 | r = b.copy() 94 | x = np.zeros_like(b) 95 | rdotr = r.dot(r) 96 | for i in xrange(cg_iters): 97 | z = f_Ax(p) 98 | v = rdotr / p.dot(z) 99 | x += v * p 100 | r -= v * z 101 | newrdotr = r.dot(r) 102 | mu = newrdotr / rdotr 103 | p = r + mu * p 104 | rdotr = newrdotr 105 | if rdotr < residual_tol: 106 | break 107 | return x 108 | 109 | def linesearch(f, x, fullstep, expected_improve_rate): 110 | accept_ratio = .1 111 | max_backtracks = 10 112 | fval = f(x) 113 | for (_n_backtracks, stepfrac) in enumerate(.5**np.arange(max_backtracks)): 114 | xnew = x + stepfrac * fullstep 115 | newfval = f(xnew) 116 | actual_improve = fval - newfval 117 | expected_improve = expected_improve_rate * stepfrac 118 | ratio = actual_improve / expected_improve 119 | if ratio > accept_ratio and actual_improve > 0: 120 | return xnew 121 | return x 122 | 123 | class SetFromFlat(object): 124 | 125 | def __init__(self, session, var_list): 126 | self.session = session 127 | assigns = [] 128 | shapes = map(var_shape, var_list) 129 | total_size = sum(np.prod(shape) for shape in shapes) 130 | self.theta = theta = tf.placeholder(tf.float32, [total_size]) 131 | start = 0 132 | assigns = [] 133 | for (shape, v) in zip(shapes, var_list): 134 | size = np.prod(shape) 135 | assigns.append(tf.assign(v,tf.reshape(theta[start:start + size],shape))) 136 | start += size 137 | self.op = tf.group(*assigns) 138 | 139 | def __call__(self, theta): 140 | self.session.run(self.op, feed_dict={self.theta: theta}) 141 | 142 | class GetFlat(object): 143 | 144 | def __init__(self, session, var_list): 145 | self.session = session 146 | self.op = tf.concat(0, [tf.reshape(v, [numel(v)]) for v in var_list]) 147 | 148 | def __call__(self): 149 | return self.op.eval(session=self.session) 150 | 151 | class GetPolicyWeights(object): 152 | def __init__(self, session, var_list): 153 | self.session = session 154 | self.op = [var for var in var_list if 'policy' in var.name] 155 | def __call__(self): 156 | return self.session.run(self.op) 157 | 158 | class SetPolicyWeights(object): 159 | def __init__(self, session, var_list): 160 | self.session = session 161 | self.policy_vars = [var for var in var_list if 'policy' in var.name] 162 | self.placeholders = {} 163 | self.assigns = [] 164 | for var in self.policy_vars: 165 | self.placeholders[var.name] = tf.placeholder(tf.float32, var.get_shape()) 166 | self.assigns.append(tf.assign(var,self.placeholders[var.name])) 167 | def __call__(self, weights): 168 | feed_dict = {} 169 | count = 0 170 | for var in self.policy_vars: 171 | feed_dict[self.placeholders[var.name]] = weights[count] 172 | count += 1 173 | self.session.run(self.assigns, feed_dict) 174 | 175 | def xavier_initializer(self, shape): 176 | dim_sum = np.sum(shape) 177 | if len(shape) == 1: 178 | dim_sum += 1 179 | bound = np.sqrt(6.0 / dim_sum) 180 | return tf.random_uniform(shape, minval=-bound, maxval=bound) 181 | 182 | def fully_connected(input_layer, input_size, output_size, weight_init, bias_init, scope): 183 | with tf.variable_scope(scope): 184 | w = tf.get_variable("w", [input_size, output_size], initializer=weight_init) 185 | # w = tf.Variable(xavier_initializer([input_size, output_size]), name="w") 186 | b = tf.get_variable("b", [output_size], initializer=bias_init) 187 | return tf.matmul(input_layer,w) + b 188 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import gym 4 | from utils import * 5 | from rollouts import * 6 | from value_function import * 7 | import time 8 | import os 9 | import logging 10 | import random 11 | import multiprocessing 12 | 13 | class TRPO(multiprocessing.Process): 14 | def __init__(self, args, observation_space, action_space, task_q, result_q): 15 | multiprocessing.Process.__init__(self) 16 | self.task_q = task_q 17 | self.result_q = result_q 18 | self.observation_space = observation_space 19 | self.action_space = action_space 20 | self.args = args 21 | 22 | def makeModel(self): 23 | self.observation_size = self.observation_space.shape[0] 24 | self.action_size = np.prod(self.action_space.shape) 25 | self.hidden_size = 64 26 | 27 | weight_init = tf.random_uniform_initializer(-0.05, 0.05) 28 | bias_init = tf.constant_initializer(0) 29 | 30 | config = tf.ConfigProto( 31 | device_count = {'GPU': 0} 32 | ) 33 | self.session = tf.Session(config=config) 34 | 35 | self.obs = tf.placeholder(tf.float32, [None, self.observation_size]) 36 | self.action = tf.placeholder(tf.float32, [None, self.action_size]) 37 | self.advantage = tf.placeholder(tf.float32, [None]) 38 | self.oldaction_dist_mu = tf.placeholder(tf.float32, [None, self.action_size]) 39 | self.oldaction_dist_logstd = tf.placeholder(tf.float32, [None, self.action_size]) 40 | 41 | with tf.variable_scope("policy"): 42 | h1 = fully_connected(self.obs, self.observation_size, self.hidden_size, weight_init, bias_init, "policy_h1") 43 | h1 = tf.nn.relu(h1) 44 | h2 = fully_connected(h1, self.hidden_size, self.hidden_size, weight_init, bias_init, "policy_h2") 45 | h2 = tf.nn.relu(h2) 46 | h3 = fully_connected(h2, self.hidden_size, self.action_size, weight_init, bias_init, "policy_h3") 47 | action_dist_logstd_param = tf.Variable((.01*np.random.randn(1, self.action_size)).astype(np.float32), name="policy_logstd") 48 | # means for each action 49 | self.action_dist_mu = h3 50 | # log standard deviations for each actions 51 | self.action_dist_logstd = tf.tile(action_dist_logstd_param, tf.pack((tf.shape(self.action_dist_mu)[0], 1))) 52 | 53 | batch_size = tf.shape(self.obs)[0] 54 | # what are the probabilities of taking self.action, given new and old distributions 55 | log_p_n = gauss_log_prob(self.action_dist_mu, self.action_dist_logstd, self.action) 56 | log_oldp_n = gauss_log_prob(self.oldaction_dist_mu, self.oldaction_dist_logstd, self.action) 57 | 58 | # tf.exp(log_p_n) / tf.exp(log_oldp_n) 59 | ratio = tf.exp(log_p_n - log_oldp_n) 60 | # importance sampling of surrogate loss (L in paper) 61 | surr = -tf.reduce_mean(ratio * self.advantage) 62 | var_list = tf.trainable_variables() 63 | 64 | eps = 1e-8 65 | batch_size_float = tf.cast(batch_size, tf.float32) 66 | # kl divergence and shannon entropy 67 | kl = gauss_KL(self.oldaction_dist_mu, self.oldaction_dist_logstd, self.action_dist_mu, self.action_dist_logstd) / batch_size_float 68 | ent = gauss_ent(self.action_dist_mu, self.action_dist_logstd) / batch_size_float 69 | 70 | self.losses = [surr, kl, ent] 71 | # policy gradient 72 | self.pg = flatgrad(surr, var_list) 73 | 74 | # KL divergence w/ itself, with first argument kept constant. 75 | kl_firstfixed = gauss_selfKL_firstfixed(self.action_dist_mu, self.action_dist_logstd) / batch_size_float 76 | # gradient of KL w/ itself 77 | grads = tf.gradients(kl_firstfixed, var_list) 78 | # what vector we're multiplying by 79 | self.flat_tangent = tf.placeholder(tf.float32, [None]) 80 | shapes = map(var_shape, var_list) 81 | start = 0 82 | tangents = [] 83 | for shape in shapes: 84 | size = np.prod(shape) 85 | param = tf.reshape(self.flat_tangent[start:(start + size)], shape) 86 | tangents.append(param) 87 | start += size 88 | # gradient of KL w/ itself * tangent 89 | gvp = [tf.reduce_sum(g * t) for (g, t) in zip(grads, tangents)] 90 | # 2nd gradient of KL w/ itself * tangent 91 | self.fvp = flatgrad(gvp, var_list) 92 | # the actual parameter values 93 | self.gf = GetFlat(self.session, var_list) 94 | # call this to set parameter values 95 | self.sff = SetFromFlat(self.session, var_list) 96 | self.session.run(tf.initialize_all_variables()) 97 | # value function 98 | # self.vf = VF(self.session) 99 | self.vf = LinearVF() 100 | 101 | self.get_policy = GetPolicyWeights(self.session, var_list) 102 | 103 | def run(self): 104 | self.makeModel() 105 | while True: 106 | paths = self.task_q.get() 107 | if paths is None: 108 | # kill the learner 109 | self.task_q.task_done() 110 | break 111 | elif paths == 1: 112 | # just get params, no learn 113 | self.task_q.task_done() 114 | self.result_q.put(self.get_policy()) 115 | elif paths[0] == 2: 116 | # adjusting the max KL. 117 | self.args.max_kl = paths[1] 118 | self.task_q.task_done() 119 | else: 120 | mean_reward = self.learn(paths) 121 | self.task_q.task_done() 122 | self.result_q.put((self.get_policy(), mean_reward)) 123 | return 124 | 125 | def learn(self, paths): 126 | 127 | # is it possible to replace A(s,a) with Q(s,a)? 128 | for path in paths: 129 | path["baseline"] = self.vf.predict(path) 130 | path["returns"] = discount(path["rewards"], self.args.gamma) 131 | path["advantage"] = path["returns"] - path["baseline"] 132 | # path["advantage"] = path["returns"] 133 | 134 | # puts all the experiences in a matrix: total_timesteps x options 135 | action_dist_mu = np.concatenate([path["action_dists_mu"] for path in paths]) 136 | action_dist_logstd = np.concatenate([path["action_dists_logstd"] for path in paths]) 137 | obs_n = np.concatenate([path["obs"] for path in paths]) 138 | action_n = np.concatenate([path["actions"] for path in paths]) 139 | 140 | # standardize to mean 0 stddev 1 141 | advant_n = np.concatenate([path["advantage"] for path in paths]) 142 | advant_n -= advant_n.mean() 143 | advant_n /= (advant_n.std() + 1e-8) 144 | 145 | # train value function / baseline on rollout paths 146 | self.vf.fit(paths) 147 | 148 | feed_dict = {self.obs: obs_n, self.action: action_n, self.advantage: advant_n, self.oldaction_dist_mu: action_dist_mu, self.oldaction_dist_logstd: action_dist_logstd} 149 | 150 | # parameters 151 | thprev = self.gf() 152 | 153 | # computes fisher vector product: F * [self.pg] 154 | def fisher_vector_product(p): 155 | feed_dict[self.flat_tangent] = p 156 | return self.session.run(self.fvp, feed_dict) + p * self.args.cg_damping 157 | 158 | g = self.session.run(self.pg, feed_dict) 159 | 160 | # solve Ax = g, where A is Fisher information metrix and g is gradient of parameters 161 | # stepdir = A_inverse * g = x 162 | stepdir = conjugate_gradient(fisher_vector_product, -g) 163 | 164 | # let stepdir = change in theta / direction that theta changes in 165 | # KL divergence approximated by 0.5 x stepdir_transpose * [Fisher Information Matrix] * stepdir 166 | # where the [Fisher Information Matrix] acts like a metric 167 | # ([Fisher Information Matrix] * stepdir) is computed using the function, 168 | # and then stepdir * [above] is computed manually. 169 | shs = 0.5 * stepdir.dot(fisher_vector_product(stepdir)) 170 | 171 | lm = np.sqrt(shs / self.args.max_kl) 172 | # if self.args.max_kl > 0.001: 173 | # self.args.max_kl *= self.args.kl_anneal 174 | 175 | fullstep = stepdir / lm 176 | negative_g_dot_steppdir = -g.dot(stepdir) 177 | 178 | def loss(th): 179 | self.sff(th) 180 | # surrogate loss: policy gradient loss 181 | return self.session.run(self.losses[0], feed_dict) 182 | 183 | # finds best parameter by starting with a big step and working backwards 184 | theta = linesearch(loss, thprev, fullstep, negative_g_dot_steppdir/ lm) 185 | # i guess we just take a fullstep no matter what 186 | theta = thprev + fullstep 187 | self.sff(theta) 188 | 189 | surrogate_after, kl_after, entropy_after = self.session.run(self.losses,feed_dict) 190 | 191 | episoderewards = np.array( 192 | [path["rewards"].sum() for path in paths]) 193 | stats = {} 194 | stats["Average sum of rewards per episode"] = episoderewards.mean() 195 | stats["Entropy"] = entropy_after 196 | stats["max KL"] = self.args.max_kl 197 | stats["Timesteps"] = sum([len(path["rewards"]) for path in paths]) 198 | # stats["Time elapsed"] = "%.2f mins" % ((time.time() - start_time) / 60.0) 199 | stats["KL between old and new distribution"] = kl_after 200 | stats["Surrogate loss"] = surrogate_after 201 | # print ("\n********** Iteration {} ************".format(i)) 202 | for k, v in stats.iteritems(): 203 | print(k + ": " + " " * (40 - len(k)) + str(v)) 204 | 205 | return stats["Average sum of rewards per episode"] 206 | -------------------------------------------------------------------------------- /results/try2/Hopper-v1-1: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.30063623587290444, 0.37494579950968426, 0.32508103450139364, 0.35154316822687787, 0.3622896154721578, 0.3438296993573507, 0.3592551350593567, 0.3722078005472819, 0.33836597998936974, 0.3789469003677368, 0.3572318514188131, 0.3327756643295288, 0.34034845034281414, 0.3336820681889852, 0.31586645046869916, 0.3414546171824137, 0.3080893317858378, 0.325428851445516, 0.3224537173906962, 0.30918596585591634, 0.32015971740086874, 0.3136422157287598, 0.3061378836631775, 0.310508398214976, 0.31170299847920735, 0.31126903295516967, 0.312106716632843, 0.30292566617329914, 0.3094649990399679, 0.3131647348403931, 0.3123855988184611, 0.3079931338628133, 0.31671658356984456, 0.3143402139345805, 0.31956981817881264, 0.3120567321777344, 0.3106871485710144, 0.31956966320673624, 0.30374913215637206, 0.31597196658452353, 0.3022311687469482, 0.30932273467381793, 0.31710753043492634, 0.30370693604151405, 0.3000874479611715, 0.3153889020284017, 0.3113625844319661, 0.2991531491279602, 0.31339490016301474, 0.3096402327219645, 0.2936861832936605, 0.2949830532073975, 0.31991100311279297, 0.30816136598587035, 0.29824040333429974, 0.2830488006273905, 0.31725458304087323, 0.30535066525141397, 0.2925266186396281, 0.300136399269104, 0.2969443837801615, 0.29804973204930624, 0.2935112476348877, 0.30172935326894124, 0.293500018119812, 0.30222684939702354, 0.2911620497703552, 0.307574196656545, 0.30009806950887047, 0.289194667339325, 0.300824236869812, 0.29222440322240195, 0.29383346637090046, 0.3024326841036479, 0.2984249830245972, 0.2996291478474935, 0.2951438864072164, 0.3002764503161112, 0.2744378169377645, 0.31081976493199664, 0.29466086626052856, 0.29846441745758057, 0.294405202070872, 0.2959420005480448, 0.29789905150731405, 0.2895543495814005, 0.2956388870875041, 0.28583231767018635, 0.3142280658086141, 0.28896660010019937, 0.2948844830195109, 0.30864350001017254, 0.2881186683972677, 0.29170869986216225, 0.304354198773702, 0.2844219168027242, 0.30090826749801636, 0.30270386934280397, 0.2880360007286072, 0.29576593240102134, 0.30997241735458375, 0.2938820997873942, 0.2738078514734904, 0.31211034854253134, 0.294757616519928, 0.2923846165339152, 0.2977492650349935, 0.2987591822942098, 0.30016581614812216, 0.28952961365381874, 0.2968270977338155, 0.295500648021698, 0.2893045663833618, 0.3041246016820272, 0.2946155826250712, 0.2925492326418559, 0.2917799154917399, 0.29439268112182615, 0.29849373499552406, 0.2992759863535563, 0.28955461581548053, 0.2990487138430277, 0.2837883472442627, 0.3058985153834025, 0.3022645672162374, 0.29962926705678306, 0.29248186747233074, 0.28902990023295083, 0.2934601982434591, 0.2931838512420654, 0.3016454497973124, 0.29427774747212726, 0.278648583094279, 0.3008786996205648, 0.29430480003356935, 0.29618540207544963, 0.2973829666773478, 0.30847763220469154, 0.2911327521006266, 0.282852832476298, 0.2984654466311137, 0.2942920645078023, 0.3091975649197896, 0.2855167826016744, 0.28337726593017576, 0.2951552510261536, 0.3156848986943563, 0.2741715987523397, 0.30252693096796673, 0.2929545799891154, 0.2945961157480876, 0.2852358341217041, 0.2895275155703227, 0.310779337088267, 0.29433706601460774, 0.28179453214009603, 0.29873038132985436, 0.29751311937967934, 0.2970436175664266, 0.2939653992652893, 0.2929718017578125, 0.28908284902572634, 0.30649763345718384, 0.28389108180999756, 0.2887634515762329, 0.29887261788050334, 0.3042831818262736, 0.28823692003885903, 0.30618534882863363, 0.30412354866663616, 0.2740340312321981, 0.29861721595128377, 0.2971414844195048, 0.28839759826660155, 0.3052070657412211, 0.2830577333768209, 0.2976803501447042, 0.274962317943573, 0.313663117090861, 0.29493961731592816, 0.29585668245951335, 0.2803660313288371, 0.30142911672592165], "mean_reward": [14.851614620108057, 18.267765177407377, 19.491560679376452, 22.364979218872236, 29.086984664644664, 32.965202833865746, 40.700834975229647, 52.993974146535621, 59.082324630729651, 80.233025198545761, 98.635729932028312, 107.98667323022482, 124.50534171675382, 136.48663878218639, 145.23519506136941, 167.63045376064622, 168.13832303527241, 181.44675194331236, 195.1950663329427, 198.4523951057167, 205.38493428283576, 214.43923081866311, 214.10618138653629, 217.38155119523364, 223.18231152848415, 223.09915518642043, 230.86966939659061, 229.7194940238054, 232.94886956946542, 240.01680597617019, 248.51629137858046, 254.49463233701221, 267.85769578288733, 279.82012852058352, 298.48220259223996, 311.76413368475806, 322.34218490806654, 347.0430541801839, 349.86188472323926, 366.05196984594659, 372.03380376178967, 390.32124129784881, 414.83690686196041, 424.6955489030168, 425.85677959177656, 448.4612857121935, 470.64590503509771, 468.4107497188898, 494.87973171677089, 518.12928972001532, 499.34244510222686, 498.57874752405507, 537.36110918499855, 557.98419666799077, 553.20231893715288, 521.13010978131103, 563.57945660793007, 572.29619706350297, 568.20732162312834, 576.8643931261181, 583.21711214784466, 581.84624517079772, 583.39772952988505, 599.17242632504281, 592.02254864907877, 605.62652094723717, 598.17264339278222, 625.04861181007163, 630.9412305978625, 621.70495537050533, 640.00414254119494, 635.48016697266507, 623.44593562659372, 641.51408163594363, 645.24982383707936, 660.47363891888654, 657.60180119525091, 665.32710907875821, 623.52518700502537, 657.47605710903883, 657.93669956576639, 662.51390108304997, 664.4404555708096, 668.05904776096259, 677.71647993371869, 661.5818556192487, 660.64954613764382, 636.31721395703903, 679.24435297575349, 673.09915663217123, 668.72224900284186, 707.75414649663264, 687.98251451839644, 684.5172458837975, 702.98430764658735, 677.43074413742318, 697.16092940438728, 719.33221734493713, 701.36421122893705, 703.82701287902864, 739.54000597597599, 734.50414542322301, 676.60571558341735, 727.22595677051868, 728.55376748468257, 720.70812393335314, 728.68775842072034, 737.72070632675309, 753.48010035990319, 743.99132036224125, 747.14338781619938, 753.82034768203869, 747.20826740462985, 771.93095147350596, 779.70745714322334, 766.54218081967792, 766.95002202465275, 766.44337185607446, 779.96638978472868, 790.10530375298276, 773.12285296580944, 792.09914433429606, 756.35630479333304, 791.07838964088353, 816.97894652254104, 829.51896022261496, 818.5753704255759, 811.80182900190744, 802.44142036901712, 798.77190432939847, 817.5255904137515, 816.14494433918321, 767.26165977920959, 792.32501972752721, 797.71450536924374, 801.51121235970629, 806.28987046040641, 854.23556331930706, 847.40959750390959, 807.26785434191311, 823.49138478101577, 821.67624120767448, 869.67256677913076, 844.52158618982492, 814.84205626649475, 818.50232928277455, 882.84233859311007, 818.74123964835724, 849.54666009273751, 851.00754812314915, 851.92897999646243, 824.9160051779653, 812.42227276750293, 873.91481846720319, 869.01187899745878, 830.93156281957192, 843.77934056365359, 856.11562916987475, 864.83405166447005, 868.36032678730578, 870.08637500901682, 854.07195324410952, 894.58650632768968, 870.99140446366448, 841.47749920934746, 868.20407400728061, 898.83925862447643, 881.55856926042782, 924.2699299244058, 953.69270598780963, 907.62648488131799, 910.4420310873885, 918.40685419375609, 912.05142784466648, 954.0345408687574, 918.59407009433482, 945.37937559067575, 878.34196225616336, 942.45759500155918, 951.95531623812667, 950.1316608450918, 913.68778060554109, 939.15130091869514], "learn_time": [0.02616721789042155, 0.0299224853515625, 0.026249082883199056, 0.027527014414469402, 0.0285148024559021, 0.026882731914520265, 0.028422300020853677, 0.02870388428370158, 0.026044281323750813, 0.02882371743520101, 0.02764018376668294, 0.025898881753285728, 0.026291048526763915, 0.025629850228627522, 0.024259984493255615, 0.026568432648976643, 0.023753682772318523, 0.025073500474294026, 0.024839750925699868, 0.024212984244028728, 0.024412333965301514, 0.02433846394220988, 0.023429131507873534, 0.02430379788080851, 0.024203582604726156, 0.024002182483673095, 0.02451408306757609, 0.02330628236134847, 0.024098817507425943, 0.024219783147176106, 0.023912199338277183, 0.02378985087076823, 0.024340434869130453, 0.024243414402008057, 0.0247132142384847, 0.02399545113245646, 0.024532000223795574, 0.024883798758188882, 0.023668734232584636, 0.024698833624521892, 0.023668432235717775, 0.024258434772491455, 0.02506121794382731, 0.02412345012029012, 0.023686464627583823, 0.024604499340057373, 0.024910330772399902, 0.02381866375605265, 0.024524184068044026, 0.024409131209055582, 0.022846901416778566, 0.02296633323033651, 0.02489155133565267, 0.02414228121439616, 0.023767149448394774, 0.022190968195597332, 0.02485486666361491, 0.02447993357976278, 0.02311916748682658, 0.024199318885803223, 0.023440635204315184, 0.023729451497395835, 0.02286781867345174, 0.02414998213450114, 0.023420965671539305, 0.024318766593933106, 0.023032402992248534, 0.024360450108846028, 0.02376393477121989, 0.022790197531382242, 0.02391608158747355, 0.023118650913238524, 0.0235028346379598, 0.02436623175938924, 0.02399028539657593, 0.024283981323242186, 0.023432735602060953, 0.024086900552113853, 0.02154275178909302, 0.02483913501103719, 0.023905750115712485, 0.023539129892985025, 0.02326626777648926, 0.023532565434773764, 0.024018351236979166, 0.023478086789449057, 0.023341151078542073, 0.023059332370758058, 0.02477636734644572, 0.022745283444722493, 0.0236460010210673, 0.02461813688278198, 0.02311856746673584, 0.023263764381408692, 0.024260183175404865, 0.02240963379542033, 0.023940749963124595, 0.02399491866429647, 0.02295376459757487, 0.023248998324076335, 0.02444451649983724, 0.023419129848480224, 0.021763066450754803, 0.025009950002034504, 0.02401706377665202, 0.023624018828074137, 0.0243165651957194, 0.023960800965627034, 0.024322382609049478, 0.023390650749206543, 0.023935115337371825, 0.02399049997329712, 0.023672831058502198, 0.024564619859059653, 0.023805518945058186, 0.02381194829940796, 0.023635701338450114, 0.023835317293802897, 0.024299418926239012, 0.024521251519521076, 0.02332610289255778, 0.024917534987131753, 0.022884082794189454, 0.025269198417663574, 0.025078336397806805, 0.02471101681391398, 0.024113500118255617, 0.023394147555033367, 0.02379851738611857, 0.024035414059956867, 0.02436230182647705, 0.02405218283335368, 0.022036051750183104, 0.024375752607981364, 0.023507916927337648, 0.02438711722691854, 0.02436293363571167, 0.025132997830708822, 0.023256019751230875, 0.02291581630706787, 0.02410548528035482, 0.023649664719899495, 0.025494349002838135, 0.02300706704457601, 0.023185082276662192, 0.024371949831644694, 0.025803216298421226, 0.02187349796295166, 0.024512465794881186, 0.024038267135620118, 0.02404651641845703, 0.023026764392852783, 0.023809266090393067, 0.025545982519785564, 0.02390339771906535, 0.022901701927185058, 0.024690798918406167, 0.024664302666982014, 0.024073747793833415, 0.024058151245117187, 0.024210131168365477, 0.02348086436589559, 0.025017416477203368, 0.02338459889094035, 0.023058815797170003, 0.024586816628774006, 0.02443353335062663, 0.02371309995651245, 0.024570548534393312, 0.024532453219095866, 0.022336014111836753, 0.024260735511779784, 0.024303948879241942, 0.023681116104125977, 0.02509028514226278, 0.022972615559895833, 0.024381852149963378, 0.02245248556137085, 0.02566740115483602, 0.0242347518603007, 0.024165717760721843, 0.02261464993158976, 0.02502696911493937]} -------------------------------------------------------------------------------- /results/speedup/Reacher-v1-1: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.016290152072906496, 0.30143713553746543, 0.3010619680086772, 0.3011153817176819, 0.30104130109151206, 0.3012360652287801, 0.3009444673856099, 0.3014391501744588, 0.3011666496594747, 0.3048012971878052, 0.3028669516245524, 0.30123353401819863, 0.30094961722691854, 0.30135711828867595, 0.3010328491528829, 0.30125603278477986, 0.30070254802703855, 0.3012173016866048, 0.30092251698176065, 0.301254932085673, 0.30088760058085123, 0.3012083808581034, 0.3008607824643453, 0.3010053515434265, 0.3009995500246684, 0.3011852025985718, 0.30063070058822633, 0.3011518200238546, 0.30075741608937584, 0.30122294823328655, 0.30080973307291664, 0.3014404137929281, 0.30106798410415647, 0.30121937990188596, 0.3008480668067932, 0.30121281544367473, 0.3008857329686483, 0.30127458572387694, 0.30098866621653236, 0.30125328302383425, 0.3008435328801473, 0.30109731753667196, 0.300880233446757, 0.3013292988141378, 0.3008000651995341, 0.30107118288675944, 0.300801948706309, 0.30111443201700844, 0.30093111594518024, 0.3012250145276388, 0.3009326179822286, 0.3010595162709554, 0.3007848660151164, 0.300898818174998, 0.3007766803105672, 0.30089148283004763, 0.3009617686271667, 0.3011609355608622, 0.3005043665568034, 0.3008572498957316, 0.30080200036366783, 0.30125916401545205, 0.3010439316431681, 0.3010810534159342, 0.30092766682306926, 0.3011598825454712, 0.3008389155069987, 0.3012503147125244, 0.29916128317515056, 0.29887248277664186, 0.2985753337542216, 0.29851091702779137, 0.29846460024515786, 0.29890295267105105, 0.29874738057454425, 0.2990871826807658, 0.29887768427530925, 0.2987700303395589, 0.2986351490020752, 0.2988072156906128, 0.2985545833905538, 0.29872336784998577, 0.2986210823059082, 0.2984943191210429, 0.2984796325365702, 0.2988308350245158, 0.29899296363194783, 0.2989691853523254, 0.29839199781417847, 0.2988514026006063, 0.29845026731491087, 0.29883700211842856, 0.2985129992167155, 0.2987803339958191, 0.29852360089619956, 0.29885714848836265, 0.29838751554489135, 0.2981787323951721, 0.2984622319539388, 0.2986976663271586, 0.29847911596298216, 0.29851186672846475, 0.2982566992441813, 0.29844770034154255, 0.298319149017334, 0.2987018346786499, 0.29860433340072634, 0.29882839918136594, 0.29868015050888064, 0.29899561405181885, 0.29833086729049685, 0.2983458360036214, 0.2985255479812622, 0.2986871004104614, 0.298697300752004, 0.2990662336349487, 0.2985466996828715, 0.29874611695607506, 0.2987150510152181, 0.2987220327059428, 0.29882359902064004, 0.29842663208643594, 0.29843586683273315, 0.29875634908676146, 0.2985326806704203, 0.29890824953715006, 0.2983637809753418, 0.29882038036982217, 0.29867265224456785, 0.29865548213322957, 0.2987694303194682, 0.2983816663424174, 0.298114550113678, 0.2986650864283244, 0.29851250251134237, 0.29896661440531414, 0.2986512025197347, 0.29874778588612877, 0.2986345648765564, 0.29870799779891966, 0.2988618493080139, 0.29874089956283567, 0.29871028661727905, 0.2989264527956645, 0.2986997683842977, 0.29822776714960736, 0.2982011675834656, 0.2984843492507935, 0.29862688382466634, 0.29901713132858276, 0.2983360171318054, 0.2986947298049927, 0.29880258242289226, 0.29881053368250526, 0.29876999855041503, 0.29874938329060874, 0.29848068157831825, 0.29862980047861737, 0.2987982670466105, 0.2986253499984741, 0.2986438155174255, 0.2990986148516337, 0.29874748388926187, 0.29872891505559285, 0.2985253651936849, 0.298842982451121, 0.29873868227005007, 0.29890122016270954, 0.2986366311709086, 0.29885511795679726, 0.2986152847607931, 0.29904191891352333, 0.2989092985788981, 0.2986440181732178, 0.29868186712265016, 0.2987949331601461, 0.29881903330485027, 0.2988920489947001, 0.29868350028991697, 0.2988942662874858, 0.29877007007598877, 0.29917086760203043, 0.29878650108973187, 0.29870866537094115, 0.29888776938120526, 0.2986317356427511], "mean_reward": [-103.55733479779428, -105.22487732741145, -100.72883357427497, -97.073033854842691, -93.262759777026233, -89.403049649302858, -86.032435120722965, -83.021353156333745, -80.276778603285678, -77.737690173122971, -74.510168897672088, -71.136150160431853, -69.32524568890328, -65.809119103220709, -63.534622827810217, -61.583882254998606, -59.911130185439724, -58.068487896606875, -55.903736838077535, -53.805970037039295, -52.652796467075589, -50.40690063211585, -48.419903831398329, -46.98753049905627, -45.216666023696405, -43.720015477728623, -42.039066203708131, -41.186756604454999, -40.000016721455317, -38.637558624209362, -37.464050422387203, -36.052300102200988, -35.283838563601137, -34.102722823003404, -33.241298740191326, -31.956016226797502, -31.126326211980615, -30.268006361971619, -28.912664299207496, -28.464093053288096, -27.406714377820421, -26.984889894449939, -26.246212092860947, -25.154453950954633, -24.415177602188312, -23.575592835370784, -23.107207926752618, -22.422686409210513, -21.726873742425123, -21.165029817098219, -20.571006382737632, -19.971324569319297, -19.572309903747854, -18.861458715929899, -18.426905105138907, -17.985828334377253, -17.49265577086576, -16.74722587733876, -16.584191538362187, -16.193464102506873, -15.791485465360884, -15.450857940953528, -14.720247991403994, -14.764792093555378, -13.922079562468236, -14.110660962867884, -13.664455965201755, -13.217877929034897, -12.894514880390748, -12.623880995378595, -12.252756751844085, -12.231425253555454, -11.679333282924143, -11.462530587439364, -11.338773013746158, -11.09372622739342, -10.929211494511398, -10.78678729371673, -10.357437372673678, -10.185814478995994, -10.000454596038175, -9.794781234823045, -9.6579282534578486, -9.5552317231751562, -9.3274499726943692, -9.0618489349008531, -8.9428816721751048, -8.8527159613118247, -8.6307534275167548, -8.5686171682977896, -8.3050391768503307, -8.1929745886794745, -7.9506986076086505, -7.7986521488171503, -7.9173451032049487, -7.539130124689267, -7.6460687099718543, -7.4276897776306052, -6.9844180607267843, -6.9442461562933451, -7.0647512369618859, -6.706292791212479, -6.7295501132594842, -6.640079734123157, -6.6657600279047733, -6.4955446054404202, -6.3326562562436406, -6.3818614857045448, -6.321835658062148, -6.4052184917890553, -6.206848465268723, -6.0721985467414132, -6.1092114623778864, -6.1015744542573058, -5.9648601269101125, -5.7159170935996011, -5.8997236142584786, -5.732043691086516, -5.6665847070209887, -5.7194517021131421, -5.3978482575647559, -5.4762684042590211, -5.7799816229623833, -5.6329345351568092, -5.4628052149154671, -5.4146250875842536, -5.4213807398422516, -5.2855302296792264, -5.4874194095506983, -5.1241198909433452, -5.2449355803555813, -5.1722188740396611, -5.3484091461426422, -5.1611224888988545, -4.9965836178661034, -5.0390539957567944, -5.0810188208938918, -5.129424455380371, -5.1286981761636214, -5.0691238988667564, -5.1161042718242777, -5.0876311788293354, -4.9708267065764291, -4.9820124979745977, -4.8348046570392462, -4.8753004586418118, -5.0697877528618669, -4.9510628027587362, -4.9431296082005796, -4.8202651284982183, -4.7938844851200804, -4.7150746239906836, -4.8917406283386908, -4.8375036757105896, -4.6949271811509785, -4.8208040283258864, -4.8321722442008834, -4.7903942673402744, -4.8163366201433515, -4.809538762843073, -4.6753899098702476, -4.7910987952132444, -4.7763280465043128, -4.5660076484873029, -4.9128773809213042, -4.6213068188149329, -4.7169601964041092, -4.7457653951223762, -4.7672628939075805, -4.6568215031743581, -4.7652078831738622, -4.8490787635523658, -4.6087101246560174, -4.605155393761831, -4.4779473194635973, -4.7219176815346868, -4.6554129589403717, -4.6318156182132055, -4.5837587787704512, -4.7311972529233026, -4.5096500648362738, -4.6159011336095279, -4.5000540779072029, -4.5585654238473241, -4.441246435344115, -4.5945845358616486], "learn_time": [0.007093183199564616, 0.024428669611612955, 0.024023783206939698, 0.024288431803385416, 0.023708983262379964, 0.02371660073598226, 0.023957467079162596, 0.025720715522766113, 0.02435876925786336, 0.024303682645161948, 0.024225167433420815, 0.024084500471750894, 0.02389593521753947, 0.023907633622487386, 0.023599831263224284, 0.024181580543518065, 0.024109268188476564, 0.023901299635569254, 0.023608100414276124, 0.02403618097305298, 0.02434364954630534, 0.024125218391418457, 0.02402704954147339, 0.02368855079015096, 0.023812468846639, 0.024267951647440594, 0.024162383874257405, 0.023568216959635416, 0.024142698446909586, 0.023609832922617594, 0.024438416957855223, 0.023795084158579508, 0.024085799853007, 0.023910315831502278, 0.024018832047780356, 0.024194232622782388, 0.024095582962036132, 0.024129366874694823, 0.023655800024668376, 0.024065216382344563, 0.02425896724065145, 0.024045681953430174, 0.02367449998855591, 0.024093663692474364, 0.023616965611775717, 0.023535748322804768, 0.023849900563557944, 0.02405866781870524, 0.02373513380686442, 0.023774615923563638, 0.023676951726277668, 0.023689715067545573, 0.02364908456802368, 0.02355578343073527, 0.023766549428304036, 0.023888583978017172, 0.023126467068990072, 0.023324350516001385, 0.023047916094462075, 0.023673367500305176, 0.023642881711324056, 0.023764415582021078, 0.023228200276692708, 0.023147149880727132, 0.02356429894765218, 0.02352118492126465, 0.023309266567230223, 0.022887949148813883, 0.023090835412343344, 0.0234523336092631, 0.023405965169270834, 0.024035314718882244, 0.023305284976959228, 0.023725001017252605, 0.023248966534932455, 0.023042682806650797, 0.02342318296432495, 0.02317001422246297, 0.023657480875651043, 0.023089520136515298, 0.023569798469543456, 0.02346491813659668, 0.023728283246358235, 0.023333434263865152, 0.023240697383880616, 0.023712035020192465, 0.023386398951212566, 0.02355591853459676, 0.023155033588409424, 0.02334526777267456, 0.023621050516764323, 0.0236571470896403, 0.023279285430908202, 0.02330756982167562, 0.023250619570414226, 0.023140919208526612, 0.02351464827855428, 0.023228617509206136, 0.023345164457956948, 0.023390432198842365, 0.023223129908243816, 0.02336424986521403, 0.023657735188802084, 0.023364718755086264, 0.02326606512069702, 0.023228867848714193, 0.023665698369344075, 0.02325783173243205, 0.023016965389251708, 0.02323723236719767, 0.02305478652318319, 0.023441569010416666, 0.02374101479848226, 0.023131418228149413, 0.023382417360941567, 0.023334050178527833, 0.023713382085164388, 0.023330132166544598, 0.023257199923197427, 0.023210418224334717, 0.023718233903249106, 0.023063798745473225, 0.023389132817586263, 0.023180298010508218, 0.023393650849660236, 0.023721166451772056, 0.023483399550120035, 0.023550419012705485, 0.023386951287587485, 0.023115734259287517, 0.022911580403645833, 0.023078234990437825, 0.023128199577331542, 0.023495717843373617, 0.02303406794865926, 0.02303876479466756, 0.02329489787419637, 0.02297178506851196, 0.023356449604034425, 0.023835150400797527, 0.02347340186436971, 0.022970934708913166, 0.023378666241963705, 0.023096601168314617, 0.023514151573181152, 0.02305924892425537, 0.023243232568105062, 0.023208852608998617, 0.023355984687805177, 0.023215516408284505, 0.0238955020904541, 0.023539217313130696, 0.02346938451131185, 0.023570001125335693, 0.023322848478953044, 0.02338778575261434, 0.023833847045898436, 0.02338331937789917, 0.02321946620941162, 0.023205498854319256, 0.023634998003641765, 0.02324246565500895, 0.023423548539479574, 0.023654747009277343, 0.02312649885813395, 0.023281848430633544, 0.023159937063852946, 0.02315096855163574, 0.02325416405995687, 0.023237613836924235, 0.023666814963022868, 0.023856818675994873, 0.023261197408040366, 0.022959001859029136, 0.02326261599858602, 0.023165798187255858, 0.023710735638936362, 0.02325658400853475, 0.023530368010203043, 0.023304800192515056, 0.02331610123316447, 0.023181883494059245, 0.023586801687876382, 0.023584532737731933, 0.023498499393463136, 0.023720335960388184]} -------------------------------------------------------------------------------- /results/try1/Swimmer-v1-1: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.2509508808453878, 0.25050511757532756, 0.2508525172869364, 0.25068073272705077, 0.2507036010424296, 0.2507984677950541, 0.25087621609369914, 0.2510766347249349, 0.2510562499364217, 0.2512484033902486, 0.2512534499168396, 0.2515124837557475, 0.25144296487172446, 0.2513684511184692, 0.25152228275934857, 0.25161768595377604, 0.2516216317812602, 0.25155046780904133, 0.2518354336420695, 0.2518183827400208, 0.25174456437428794, 0.2518635829289754, 0.2520597179730733, 0.252078115940094, 0.25217456420262657, 0.25208705266316733, 0.2518766681353251, 0.25210323333740237, 0.2521912336349487, 0.25230876604715985, 0.2523415327072144, 0.25214921633402504, 0.2525656143824259, 0.2524651010831197, 0.2525039513905843, 0.25252658128738403, 0.2525733470916748, 0.2524592161178589, 0.25280426343282064, 0.25275746583938596, 0.25260106722513836, 0.2525605003039042, 0.25291081666946413, 0.2526890317598979, 0.25289531946182253, 0.2528205474217733, 0.2530490835507711, 0.25294996897379557, 0.2527724027633667, 0.2530275821685791, 0.2529285152753194, 0.252967381477356, 0.2530546188354492, 0.2530394991238912, 0.25322213570276897, 0.25303306579589846, 0.25319845279057823, 0.2530434171358744, 0.2531754493713379, 0.25305613279342654, 0.2532989343007406, 0.2532036304473877, 0.25328871806462605, 0.25336815118789674, 0.25330751736958823, 0.2532074491182963, 0.25329648653666176, 0.2533938646316528, 0.2532884478569031, 0.2534010330835978, 0.25340445041656495, 0.25344763199488324, 0.2533926129341125, 0.25323503017425536, 0.2535740494728088, 0.25350261926651, 0.2534099499384562, 0.2534151315689087, 0.25152473052342733, 0.2512013514836629, 0.25118038256963093, 0.25085320075352985, 0.25089536905288695, 0.2510639150937398, 0.25079126755396525, 0.2511339505513509, 0.25097519954045616, 0.2510188182195028, 0.2510004162788391, 0.251231050491333, 0.2511411666870117, 0.2511720339457194, 0.2511596361796061, 0.2511754989624023, 0.2510932485262553, 0.25105008681615193, 0.2511953830718994, 0.25103019873301186, 0.2510912815729777, 0.251074481010437, 0.25081164836883546, 0.25092218319574994, 0.25107961893081665, 0.2513229330380758, 0.2511026978492737, 0.25093564987182615, 0.250765852133433, 0.2511141498883565, 0.25103431940078735, 0.25122056404749554, 0.25152740081151326, 0.25096993446350097, 0.2511356671651204, 0.25115771691004435, 0.2512896498044332, 0.2511912822723389, 0.2510623812675476, 0.25117030143737795, 0.25088974634806316, 0.25099603335062665, 0.2509312669436137, 0.25116061766942344, 0.25104908148447674, 0.2509332497914632, 0.25101235310236614, 0.25107245047887167, 0.2508919477462769, 0.25103776852289833, 0.2511783162752787, 0.2511093974113464, 0.2511322498321533, 0.25102808078130084, 0.2511700669924418, 0.2506875197092692, 0.2509237845738729, 0.25092314879099525, 0.250976018110911, 0.25080010096232097, 0.25081368287404376, 0.25095478296279905, 0.25088245073954263, 0.2507890184720357, 0.2509555180867513, 0.2508568008740743, 0.2510374824206034, 0.25087746779123943, 0.2506889343261719, 0.2505473494529724, 0.25086288452148436, 0.2507470528284709, 0.2508651852607727, 0.25083498160044354, 0.2512214660644531, 0.2507532517115275, 0.25115131537119545, 0.25098353226979575, 0.2509558836619059, 0.25102689663569133, 0.25084335009256997, 0.25088321765263877, 0.251116414864858, 0.2509528676668803, 0.25103561878204345, 0.25081191857655843, 0.2509557326634725, 0.25100661913553873, 0.25107288360595703, 0.25088903109232585, 0.2509879986445109, 0.25102736949920657, 0.2511557181676229, 0.2510146180788676, 0.25107044776280724, 0.25094510316848756, 0.2509476661682129, 0.25087281862894695, 0.2510944167772929, 0.2510995348294576, 0.2509424646695455, 0.2509541471799215, 0.25109933217366537, 0.2508119821548462, 0.25113293329874675, 0.2508981148401896, 0.2510261336962382, 0.25089905261993406, 0.2509376009305318, 0.2511857827504476, 0.2513476689656576, 0.2513850649197896, 0.25112549861272176, 0.25123629967371625, 0.2511590480804443, 0.25107113520304364, 0.25122951666514076, 0.2510010321935018, 0.25089348554611207, 0.25118613640467324, 0.25154251654942833, 0.25121681292851766, 0.2511860529581706], "mean_reward": [3.6474021241462515, 3.3810848706776824, 8.6537927248172366, 20.650903586791177, 24.749135318476995, 28.382537502263613, 34.40306832792421, 32.585295637914832, 41.382374218097048, 38.532199029162506, 41.689943135453021, 40.824820179360053, 36.958437162413631, 43.590524211054507, 45.072661825151414, 40.73191390220633, 39.542624205806433, 35.301368978656143, 34.805826636042219, 35.581207760886564, 36.978278326094539, 35.84636513472249, 38.387004967717715, 38.765423793760455, 38.697823619585719, 39.224234113412429, 39.108085661209401, 41.132830146446949, 40.804557861662104, 39.694179345071646, 40.279402204543558, 42.873232257434907, 40.935565944180382, 43.129339300807509, 39.960005075660078, 41.059636055208799, 41.825247080056208, 42.71558409955361, 41.521422054106459, 41.06306618571643, 43.645510518788591, 43.963795037888033, 42.168450156234918, 41.200023835413674, 42.943648583508761, 43.760465121301181, 42.660814374871691, 45.213313655787275, 45.327602708868817, 42.842486512667101, 41.963599537030966, 44.310117757186873, 41.450334762931661, 42.59041658790548, 41.950803414230215, 42.039288496933743, 42.216707686472674, 44.784292949386348, 44.930046053430416, 45.77060999240517, 45.31623126385108, 45.774474337693029, 45.880983562446623, 45.293551873160155, 45.308421139773152, 46.155536162605713, 46.129724114442993, 46.478522598657435, 45.71866096262444, 45.136866430910963, 47.417181306728807, 46.698147589675386, 45.599710622772918, 47.093732771946847, 47.830646056712169, 48.686398696407643, 48.949325557297527, 48.760770670651837, 48.477816158454672, 48.684700397671023, 49.415956256381165, 49.911135904746551, 49.412570518985682, 50.696512861367836, 50.585508298320789, 52.031017522721768, 50.851544867312278, 50.023905622429751, 52.795104352541856, 51.501613181020979, 51.210848391104129, 52.232458998354147, 53.316370181095373, 54.289747743978957, 52.038667065445928, 53.655350848709659, 53.991180269090748, 55.474362694324626, 55.623721058284687, 56.668637749967914, 56.356362855853561, 58.608950373344122, 59.14076996878994, 59.230717252195333, 57.336928465230947, 59.989932026181613, 62.005591458128904, 60.166286069309386, 59.563426028194428, 62.696559478659495, 61.057283465621005, 61.692359035127389, 65.475709195053398, 64.795168313542476, 65.61266436110337, 66.951704679014853, 68.467798542179338, 67.914648366591209, 70.356577999999445, 71.059759082533418, 74.668363404215455, 74.363708554212195, 73.031363572182869, 75.080801726222717, 77.307720845883395, 79.299227262918166, 79.433208789692827, 81.369836771304321, 84.261844407187567, 87.539089580609215, 84.936120151512952, 89.202281113212962, 87.791104297414023, 95.482728031602136, 90.925787852094189, 95.099568790267739, 94.134735574035503, 95.345928217591251, 98.171482342219008, 101.35136948332476, 99.8016377597651, 97.842705506645927, 99.368686471303505, 98.323590850716613, 102.68094758203213, 106.40346441455328, 106.98245154085927, 104.47959576104441, 107.12467554629875, 105.26966051125926, 104.56411049460071, 106.21088214258687, 108.55323200171946, 109.7239170162084, 109.2678728973423, 105.72408731253056, 110.24432679660541, 110.10275875819184, 106.91226419597044, 109.87323428775821, 111.93900288237572, 112.24653925144696, 111.37711116839543, 110.74470549282822, 111.87195634054733, 111.65187593657924, 110.78801300332479, 112.88550153587323, 111.8763957641996, 113.59995484128771, 112.18272799115357, 109.93741268289958, 112.50010924410326, 113.39056019625434, 114.29463463942065, 114.78269297391473, 111.55004674345375, 112.21391579362725, 114.04963243122874, 113.14938809935522, 114.76752943682224, 114.45918772019812, 113.42682823899456, 115.42119694517865, 116.66101230437236, 115.58583509428888, 117.26866458464046, 118.42274797552209, 114.81551949300908, 113.16235753476344, 113.96431186609689, 116.68942740412676, 115.48488630243187, 116.36081680085366, 116.06065331200088, 115.79355428484186, 115.5080705302228, 113.66277044694485, 113.74949699757296, 115.36799008785188, 116.44592522049149], "learn_time": [0.024546229839324953, 0.02370216449101766, 0.02352561553319295, 0.023237550258636476, 0.023296332359313963, 0.023286068439483644, 0.02321328322092692, 0.023292736212412516, 0.023516766230265298, 0.023475865523020428, 0.02307544946670532, 0.023419682184855142, 0.02335145076115926, 0.023174583911895752, 0.023092782497406004, 0.023474701245625815, 0.023294464747111002, 0.023223567008972167, 0.023337384064992268, 0.02303078571955363, 0.02318089803059896, 0.023471081256866456, 0.02314271926879883, 0.023131847381591797, 0.023522432645161948, 0.023029935359954835, 0.023371164004007974, 0.023492483297983806, 0.02308839956919352, 0.02325523296991984, 0.02339108387629191, 0.023430252075195314, 0.022820882002512612, 0.023190534114837645, 0.023707886536916096, 0.023044367631276447, 0.02324998378753662, 0.023430697123209634, 0.02347806692123413, 0.02283173402150472, 0.023709551493326823, 0.023209214210510254, 0.02333063284556071, 0.022746165593465168, 0.023164017995198568, 0.02329946756362915, 0.02318759759267171, 0.023004082838694255, 0.023307184378306072, 0.02333626349767049, 0.023153150081634523, 0.023226932684580485, 0.023423234621683758, 0.023317416508992512, 0.022893750667572023, 0.023387384414672852, 0.02365090052286784, 0.02336738109588623, 0.023735463619232178, 0.023756198088328042, 0.023556617895762126, 0.023767884572347006, 0.02339145342508952, 0.023318219184875488, 0.02330331802368164, 0.02354348103205363, 0.02318011919657389, 0.023306198914845786, 0.023580265045166016, 0.023270400365193684, 0.023164947827657063, 0.023366248607635497, 0.023769299189249676, 0.023152252038319904, 0.023077364762624106, 0.02373063564300537, 0.023033034801483155, 0.02305061419804891, 0.02300551732381185, 0.023184951146443686, 0.023604698975880942, 0.02324980099995931, 0.02363655169804891, 0.02292523384094238, 0.023759432633717856, 0.023357085386912026, 0.023494215806325276, 0.023077317078908286, 0.023372666041056315, 0.02349844773610433, 0.022827565670013428, 0.023540250460306802, 0.02342221736907959, 0.023287697633107503, 0.023878232638041178, 0.023287717501322427, 0.023657063643137615, 0.023574197292327882, 0.02335249980290731, 0.023298450311024985, 0.023785614967346193, 0.02351541519165039, 0.023725136121114095, 0.02357738415400187, 0.02320846716562907, 0.023265500863393147, 0.023526298999786376, 0.02334821621576945, 0.023348251978556316, 0.023812750975290935, 0.02332388162612915, 0.023174333572387695, 0.023574280738830566, 0.023443981011708578, 0.02319688399632772, 0.023388083775838217, 0.023350048065185546, 0.023357383410135903, 0.02350458304087321, 0.023338099320729572, 0.023476465543111166, 0.023638534545898437, 0.023306365807851157, 0.02358321746190389, 0.02306308348973592, 0.023251819610595702, 0.023134148120880126, 0.023498018582661945, 0.023264082272847493, 0.023600518703460693, 0.023455798625946045, 0.02346669832865397, 0.023540715376536053, 0.023111883799235025, 0.023192032178243, 0.023445498943328858, 0.02368541955947876, 0.023211483160654703, 0.023124818007151285, 0.02322628100713094, 0.02309579849243164, 0.023859949906667073, 0.02318253517150879, 0.023502683639526366, 0.023578699429829916, 0.022962482770284016, 0.023141499360402426, 0.022823584079742432, 0.023075334231058755, 0.023105219999949137, 0.023396066824595132, 0.023097232977549235, 0.02358189821243286, 0.023386017481486002, 0.023140867551167805, 0.023515049616495767, 0.023195465405782063, 0.0231518824895223, 0.023494267463684083, 0.023344465096791587, 0.023263716697692872, 0.023016432921091717, 0.023439701398213705, 0.023418680826822916, 0.023142731189727782, 0.023364285628000896, 0.023380549748738606, 0.023505000273386638, 0.02333758274714152, 0.023432433605194092, 0.023332619667053224, 0.023415382703145346, 0.023089281717936196, 0.0235710342725118, 0.023135832945505776, 0.023339633146921793, 0.02346165180206299, 0.023206651210784912, 0.023541216055552164, 0.02313078244527181, 0.02352861563364665, 0.023751819133758546, 0.02371738354365031, 0.02285145123799642, 0.02320340077082316, 0.02306908369064331, 0.022937798500061037, 0.02293303410212199, 0.023063000043233237, 0.023601833979288736, 0.022946250438690186, 0.02332666317621867, 0.023103765646616616, 0.02308800220489502, 0.02310598293940226, 0.023362632592519125, 0.024089086055755615, 0.023257402578989665, 0.023406000932057698, 0.023162682851155598, 0.022955532868703207]} -------------------------------------------------------------------------------- /results/try1/Reacher-v1-1: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.29616591533025105, 0.2956587155659994, 0.2955677350362142, 0.2956298828125, 0.2957342187563578, 0.2956055998802185, 0.29556839863459267, 0.29558934768040973, 0.29581089814503986, 0.2955086668332418, 0.2957978844642639, 0.2958856701850891, 0.2955107847849528, 0.29557036558787025, 0.29566198190053306, 0.2956874966621399, 0.29570398330688474, 0.2957133849461873, 0.29565308094024656, 0.29570123354593914, 0.2958657185236613, 0.2955397168795268, 0.2957093834877014, 0.29576644897460935, 0.29582083225250244, 0.2957133849461873, 0.29556018511454263, 0.2955674489339193, 0.29553356965382893, 0.29574923515319823, 0.2955858826637268, 0.29545643329620364, 0.2956277330716451, 0.2956144690513611, 0.2956282138824463, 0.2956283489863078, 0.295600163936615, 0.29556778271993, 0.29561954736709595, 0.29545739889144895, 0.2958430488904317, 0.2956061363220215, 0.29557255109151204, 0.29566933314005533, 0.2955672820409139, 0.2956366976102193, 0.29544243415196736, 0.29542323350906374, 0.29555109739303587, 0.29537655115127565, 0.2956065813700358, 0.2957647363344828, 0.2954418977101644, 0.2954742153485616, 0.29554475148518883, 0.2957034508387248, 0.2954898476600647, 0.29545774857203166, 0.2955837686856588, 0.29532054662704466, 0.29540183146794635, 0.2954087495803833, 0.2954029162724813, 0.2955434521039327, 0.2956604520479838, 0.295362651348114, 0.29531356493632, 0.29528308312098184, 0.29560216665267947, 0.2949937303860982, 0.2953013857205709, 0.29514636993408205, 0.29556981722513836, 0.2954467495282491, 0.29590131441752116, 0.2952387491861979, 0.2953335682551066, 0.29521453380584717, 0.29534003337224324, 0.29536478519439696, 0.2952044367790222, 0.2951907674471537, 0.29549820025761925, 0.29533326625823975, 0.29523799816767377, 0.29530548652013144, 0.29531104962031046, 0.2934518853823344, 0.29363763332366943, 0.29324055115381875, 0.2928613305091858, 0.2927734653155009, 0.2930726806322734, 0.29281933307647706, 0.29289019902547203, 0.29260733524958293, 0.2929267326990763, 0.29278064966201783, 0.29289588530858357, 0.2929435849189758, 0.2927054166793823, 0.2926982800165812, 0.29267934958140057, 0.29281179904937743, 0.2931545654932658, 0.29298950036366783, 0.29279921849568685, 0.2928656339645386, 0.29248295227686566, 0.2924776355425517, 0.29281659921010333, 0.29273620049158733, 0.2929309010505676, 0.29278274774551394, 0.29288079738616946, 0.293183434009552, 0.2927787184715271, 0.29258528153101604, 0.29302321672439574, 0.2931160489718119, 0.29312680164972943, 0.29298179944356284, 0.2928459842999776, 0.29255024989446005, 0.2926412026087443, 0.29296907981236775, 0.2930881500244141, 0.29273481766382853, 0.2927292466163635, 0.29297588268915814, 0.2927322824796041, 0.29267613490422567, 0.29292758703231814, 0.29240548610687256, 0.2925800681114197, 0.29260465304056804, 0.2925909996032715, 0.29306551615397136, 0.29294211864471437, 0.2927402853965759, 0.2928285678227743, 0.2926735480626424, 0.29280091921488444, 0.2928835352261861, 0.29312653144200645, 0.29298078616460166, 0.2930232008298238, 0.2928315162658691, 0.2930315335591634, 0.29275370041529336, 0.2927752137184143, 0.292986003557841, 0.29293373425801594, 0.2927563508351644, 0.2927804827690125, 0.29275306860605876, 0.29292321602503457, 0.2926416158676147, 0.29272956450780235, 0.29257931709289553, 0.2926892161369324, 0.29293216864267985, 0.2929742177327474, 0.29280248483022053, 0.29258085091908775, 0.29274914662043255, 0.29268773396809894, 0.29300595124562584, 0.2927413821220398, 0.29290560086568196, 0.2929376482963562, 0.2923908511797587, 0.2925735354423523, 0.292773965994517, 0.29315586884816486, 0.29255831638971963, 0.29274798234303795, 0.2927450656890869, 0.2931130170822144, 0.29268268346786497, 0.2927360693613688, 0.29288973410924274, 0.29280308485031126, 0.2928363005320231, 0.2928752700487773, 0.2926988164583842, 0.2927255670229594, 0.29319603045781456, 0.29280041456222533, 0.2929165323575338, 0.2928707162539164, 0.29303531646728515, 0.2925852656364441, 0.29253979921340945, 0.2929147164026896, 0.2930206815401713, 0.2927938501040141, 0.2925676663716634, 0.2926737467447917, 0.29285996754964194], "mean_reward": [-109.06995450332025, -104.03422527665626, -100.67751969735207, -97.399331263534862, -92.866198659211406, -89.400660466489498, -85.969843727988703, -82.768993644217844, -80.281258464042736, -76.692295415406221, -74.31850500845772, -71.794073060112339, -68.526352233634867, -67.020507518415627, -64.061527922546134, -60.969488814982597, -59.13090176986978, -57.179971518020814, -55.454640570711632, -53.539642012967541, -51.64560790630712, -49.68664482502723, -48.549472868387582, -46.769678726323242, -45.27741727462697, -43.664571897105645, -41.899462691547399, -40.801505095363737, -39.873265134902979, -38.596453489477646, -37.032881771817827, -35.940273461164068, -34.983262451023393, -33.407669135123903, -32.542178878433369, -31.843147354593128, -30.838025034887714, -29.745485017763848, -28.742852803309511, -28.280913239710198, -27.45035773031876, -26.254415510167668, -25.655629774035624, -24.768856819769283, -24.178763497460306, -23.565672459990665, -22.78416341722491, -22.299884282218677, -21.495220388711324, -20.975839490294302, -20.20250004148804, -19.936620137711159, -19.416766449686978, -18.660044456456248, -18.309965608434691, -17.807668989303593, -17.206723853242107, -16.448493225231786, -16.279536745864689, -15.826101851659702, -15.679985639509999, -14.954412928232703, -14.550091397972487, -14.054932444184395, -13.803555785344166, -13.646624870464915, -13.264088739565079, -13.012590732088469, -12.768583873015181, -12.209600590987867, -12.07097430546257, -11.75141110386058, -11.585509759796587, -11.163701570483463, -11.063420903644683, -10.646360043649516, -10.542844829956248, -10.420014515115472, -10.305738582145336, -9.8413574798926611, -9.5864274411948855, -9.5962325788938685, -9.515252462673347, -9.3086733463729328, -9.2104165440051009, -8.9037704408642888, -8.7901487981781798, -8.5138833622501355, -8.3583365835636911, -8.1299717277279075, -8.0658303737946273, -7.9529205829646319, -7.8234681909374855, -7.6409939976939745, -7.3377376959145248, -7.4492975541364759, -7.2701206530001583, -7.1527652139389737, -6.9650320035295428, -7.0470429570123159, -6.7989334177611136, -6.67955063027925, -6.6171039411660573, -6.5410465593395566, -6.4646771147847559, -6.4428630032315235, -6.382392911135339, -6.2548025435502828, -6.1795706025411015, -6.1645820763744545, -5.9722225605325958, -5.8353791486404205, -5.8300051061178415, -5.9003247117021189, -5.828754337969376, -5.7458376038663417, -5.6500542375943157, -5.5768130975339352, -5.5060570866017784, -5.659320125302993, -5.525786497575659, -5.4070902656295834, -5.2299930317176724, -5.2843683535369728, -5.2765460970350002, -5.1542064563513712, -5.1966585809776209, -5.1689576368838042, -5.2254901576439234, -5.23484220103576, -5.1980142259447923, -5.1328082220273537, -5.0759143279120682, -4.7793495867284266, -5.0265884546024377, -4.9063648084168374, -5.1161925589708792, -5.0368206338031172, -4.9406477900150625, -4.9933624990934087, -4.9948000493886182, -4.8568289984939561, -4.7892392302594295, -4.8584929688996974, -4.8107024821728661, -4.774614595665124, -4.8733396340564861, -4.7679535271005804, -4.8220330300403962, -4.6829661325956105, -4.6752289148974224, -4.7996645217068679, -4.5908961301583044, -4.9160074858229086, -4.6237075487447186, -4.7893032722922797, -4.627621861847854, -4.6930914801554682, -4.6281724317831898, -4.7611713039081369, -4.5794989881022765, -4.6448492526273455, -4.6732131240582486, -4.6216945351008061, -4.5147558229168281, -4.588447730967002, -4.5675682356455241, -4.4749505788387829, -4.4281227555821676, -4.5528318334758868, -4.4778570190554907, -4.515417709117278, -4.3936071489829773, -4.5662922600348779, -4.4734169571556306, -4.5235360919692429, -4.5581523614363411, -4.5482566262787438, -4.5503082372309196, -4.5779064742496924, -4.5669348328042396, -4.5927558277019784, -4.5218855689654696, -4.3415211095591379, -4.4468078130795394, -4.317419505462464, -4.5200440289407817, -4.5746146466119555, -4.3894637429514933, -4.4356176096311435, -4.5307994311228672, -4.4685100924668983, -4.3748367677869444, -4.441498032996269, -4.4837479711386434, -4.2158501029239339, -4.4363242564648315, -4.2902319821185584, -4.4032852703874452, -4.4627107340025081], "learn_time": [0.02372551759084066, 0.023105168342590333, 0.023068686326344807, 0.022318732738494874, 0.02284855047861735, 0.02258046865463257, 0.02382949988047282, 0.023034981886545815, 0.02292951742808024, 0.02239941358566284, 0.02322621742884318, 0.022716498374938963, 0.022551349798838296, 0.022606281439463298, 0.022708284854888915, 0.022436916828155518, 0.022931249936421712, 0.02278428077697754, 0.02268509864807129, 0.022624731063842773, 0.02234816551208496, 0.022560580571492513, 0.02272916634877523, 0.022623217105865477, 0.022773452599843345, 0.02248455286026001, 0.02271269957224528, 0.022436614831288657, 0.022742366790771483, 0.02291428248087565, 0.022596784432729087, 0.022135881582895915, 0.022395380338033042, 0.022746217250823975, 0.02270854711532593, 0.022581319014231365, 0.022521400451660158, 0.022725284099578857, 0.02289948066075643, 0.02254321575164795, 0.02264358599980672, 0.022691682974497477, 0.02274903456370036, 0.02277574936548869, 0.022644368807474773, 0.022587164243062337, 0.022472453117370606, 0.02236619790395101, 0.02278521458307902, 0.02273950179417928, 0.02253599961598714, 0.02305490175882975, 0.022898701826731364, 0.022577333450317382, 0.02256925106048584, 0.02279661496480306, 0.022512447834014893, 0.022895514965057373, 0.02262279987335205, 0.022916114330291747, 0.022932235399881998, 0.022749551137288413, 0.022533667087554932, 0.023003133138020833, 0.02291330099105835, 0.022899969418843587, 0.022674365838368734, 0.022832000255584718, 0.022483650843302408, 0.022603766123453776, 0.022594368457794188, 0.022878801822662352, 0.02293663422266642, 0.023102649052937827, 0.02275019884109497, 0.02247490088144938, 0.022647984822591147, 0.022778149445851645, 0.02269105116526286, 0.02278865178426107, 0.022518301010131837, 0.022666784127553304, 0.022540950775146486, 0.022538181145985922, 0.022983102003733318, 0.022736382484436036, 0.022842232386271158, 0.022635034720102944, 0.022643049558003742, 0.022792899608612062, 0.02228320042292277, 0.02270668347676595, 0.022847867012023924, 0.02293030023574829, 0.02270503044128418, 0.022879799207051594, 0.022344382603963216, 0.02241521676381429, 0.023047415415445964, 0.022464752197265625, 0.023068817456563313, 0.02264556884765625, 0.022626634438832602, 0.022514732678731282, 0.0226889967918396, 0.022391080856323242, 0.022604997952779135, 0.022324899832407635, 0.022554667790730794, 0.022615265846252442, 0.022790817419687907, 0.02294268210728963, 0.022905214627583822, 0.02240593433380127, 0.02280523379643758, 0.023060067494710287, 0.02307215134302775, 0.022920866807301838, 0.022640299797058106, 0.02247688372929891, 0.022393580277760824, 0.02292936642964681, 0.022282747427622478, 0.022683350245157878, 0.022599581877390543, 0.022770333290100097, 0.02288358211517334, 0.022541415691375733, 0.022812883059183758, 0.02237321933110555, 0.0228028138478597, 0.022595314184824626, 0.023316832383473714, 0.02283715009689331, 0.022621786594390868, 0.022850616772969564, 0.02312499682108561, 0.022786414623260497, 0.022778415679931642, 0.022649947802225748, 0.022680532932281495, 0.02292999823888143, 0.022456681728363036, 0.022897863388061525, 0.02252551317214966, 0.023055768013000487, 0.02261449893315633, 0.022754216194152833, 0.022732114791870116, 0.022575946648915608, 0.022397565841674804, 0.022871665159861245, 0.022908584276835123, 0.022898515065511067, 0.022518332799275717, 0.02303996483484904, 0.022966015338897704, 0.022385799884796144, 0.022275833288828532, 0.022738381226857503, 0.02257716655731201, 0.022493267059326173, 0.022861166795094808, 0.022466119130452475, 0.02268604834874471, 0.02243343194325765, 0.022575016816457114, 0.02275139888127645, 0.022710633277893067, 0.02241438627243042, 0.02250063419342041, 0.022868351141611735, 0.022616851329803466, 0.02291238307952881, 0.022869968414306642, 0.022593549887339272, 0.02267351547876994, 0.022671282291412354, 0.0228721817334493, 0.022502315044403077, 0.02268619934717814, 0.02282419999440511, 0.022863499323527017, 0.022617781162261964, 0.022668115297953286, 0.022902286052703856, 0.022974284489949544, 0.022223166624704995, 0.022390516599019368, 0.022499966621398925, 0.022538316249847413, 0.022768501440684, 0.02281554937362671, 0.02255203326543172, 0.022829349835713705, 0.022802849610646565, 0.023062352339426676, 0.022592798868815104, 0.022624631722768147, 0.022486631075541177]} -------------------------------------------------------------------------------- /results/try1/Reacher-v1-5: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.08599271774291992, 0.08390081723531087, 0.08398046890894571, 0.08322778145472208, 0.08334093491236369, 0.08316266934076945, 0.08329355319341024, 0.08340601523717245, 0.0833422859509786, 0.08330686887105306, 0.08334254821141561, 0.08346466620763143, 0.08341871500015259, 0.08398136695226034, 0.08335704803466797, 0.08332928419113159, 0.08326966762542724, 0.0832259178161621, 0.08345381816228231, 0.08323314984639486, 0.083228600025177, 0.08314978281656901, 0.08335011800130208, 0.08343584934870402, 0.08321374654769897, 0.0831835150718689, 0.08334731658299764, 0.08349639972050985, 0.08330638408660888, 0.08325216372807821, 0.08312116861343384, 0.08322498401006063, 0.08339200019836426, 0.08333055178324382, 0.08331086635589599, 0.08349815209706625, 0.08346506754557291, 0.083279550075531, 0.08319313128789266, 0.08328890005747477, 0.0832671324412028, 0.0831846316655477, 0.0832839806874593, 0.0832500139872233, 0.0833444356918335, 0.08310346603393555, 0.08323254982630411, 0.08329861958821615, 0.08312998612721761, 0.08323591550191244, 0.08326921463012696, 0.08327724933624267, 0.08326410055160523, 0.08326893250147502, 0.08355246384938558, 0.08322525024414062, 0.08313371737798055, 0.08326761325200399, 0.08330881595611572, 0.08325598239898682, 0.08321719964345296, 0.0831923802693685, 0.08321125109990438, 0.08329063256581624, 0.08330536683400472, 0.08324756622314453, 0.08321961959203085, 0.08318061828613281, 0.08332535028457641, 0.0831458330154419, 0.08319125175476075, 0.08321579694747924, 0.08348833322525025, 0.08317698240280151, 0.08330133358637491, 0.08322738409042359, 0.08325931628545126, 0.08330649932225545, 0.08328959941864014, 0.08318803310394288, 0.08329028685887654, 0.08334054946899414, 0.0832319974899292, 0.08313493331273397, 0.0834652304649353, 0.08326089779535929, 0.08324748277664185, 0.08330556551615396, 0.0833056648572286, 0.08325544993082683, 0.08315479755401611, 0.08316346406936645, 0.08319424788157145, 0.08325506846110026, 0.0833633303642273, 0.08324578205744425, 0.08313514788945515, 0.08337796529134114, 0.08315213123957316, 0.08316676616668701, 0.08309296369552613, 0.08336489995320638, 0.08315123319625854, 0.08321728308995564, 0.08314103285471598, 0.08327054977416992, 0.08379961649576823, 0.0832102656364441, 0.0831358830134074, 0.08314375082651775, 0.08305725256601969, 0.08314100106557211, 0.08320353428522746, 0.08313913345336914, 0.08314103285471598, 0.08330545028050741, 0.08330994844436646, 0.08330659866333008, 0.08329946994781494, 0.0832001010576884, 0.08333776791890463, 0.08312665224075318, 0.08337655067443847, 0.08331530094146729, 0.0833096981048584, 0.08330405155817668, 0.08318300247192383, 0.08327333529790243, 0.08336515029271443, 0.08322566747665405, 0.08315668106079102, 0.08343579769134521, 0.08303078413009643, 0.08316778341929118, 0.08313168287277221, 0.08320409854253133, 0.08325883150100707, 0.08325289885203044, 0.08322941859563192, 0.08329108556111654, 0.08325628439585368, 0.08318529923756918, 0.08313301801681519, 0.08313528299331666, 0.08313921689987183, 0.08319501479466757, 0.08324383099873861, 0.08341120084126791, 0.08319131533304851, 0.08324931859970093, 0.08332739671071371, 0.08350019852320353, 0.08330154816309611, 0.08318185011545817, 0.08323780298233033, 0.08304154872894287, 0.08313153584798177, 0.08327099879582724, 0.08314121961593628, 0.08313285112380982, 0.08320316473642984, 0.083071498076121, 0.08306448459625244, 0.08339581886927287, 0.08321468432744344, 0.08302443424860637, 0.08336856365203857, 0.08304404815038045, 0.08317065238952637, 0.08326345284779867, 0.08327601750691732, 0.08318384885787963, 0.08324979941050212, 0.08309963544209799, 0.08318835099538167, 0.08322441975275675, 0.08303939898808797, 0.08326958417892456, 0.08314750194549561, 0.0832614819208781, 0.08320183356602986, 0.08313456773757935, 0.08316199779510498, 0.08313250144322713, 0.0829772154490153, 0.08339850107828777, 0.08310401837031046, 0.08312928279240926, 0.08301801681518554, 0.08337968587875366, 0.0830854336420695, 0.08307763338088989, 0.08312739928563435, 0.08310871918996175, 0.0830544670422872, 0.0830432653427124, 0.08317598104476928, 0.08318898677825928, 0.08301080067952474, 0.08340229988098144], "mean_reward": [-106.33521010317865, -105.07273273832715, -99.377818405725577, -97.618785708784017, -93.561443021535325, -91.292243937607012, -85.947009730091722, -85.047872310210309, -80.72985774309825, -76.534212565320388, -74.320293668563025, -72.938872163432677, -68.213462330897983, -65.44491883356379, -63.632142185236901, -64.56011222087362, -60.980399350221653, -58.468289201755375, -57.770713543655404, -54.845615815933762, -54.800026637800073, -50.727498775071936, -49.636665159076671, -47.948367708563211, -46.860096766751603, -46.008322702300923, -44.147361333609723, -42.886834855367425, -41.741118231194065, -40.332315695453751, -39.194531730390153, -38.786928978816988, -36.909090087372327, -36.298615451307434, -34.04599367094319, -33.513938945171773, -32.170178199285367, -31.947623053247344, -30.396763297833623, -29.707888128298091, -29.459550952467797, -28.760198945776402, -27.525108124690075, -26.670830046985515, -25.966318038550916, -25.55227619651232, -24.514551201204913, -24.068420133538385, -23.578350804842302, -23.16998912897304, -22.339032112992427, -21.935086609555245, -20.778761035398258, -20.729873608683747, -19.83858137198963, -19.273564003006229, -18.599451690463535, -18.264453668108548, -17.65169098321519, -17.435215679040518, -16.859592394217863, -16.180317115442623, -15.938012116550876, -15.941824778294585, -15.590421997853722, -14.680335209564394, -14.367122365399283, -14.421302221323248, -14.107160833518513, -13.656587754072884, -13.129116399739212, -12.977579586861719, -12.515258441445409, -12.448640847826059, -12.006297595171208, -11.585324352027586, -11.625208343209197, -11.389389921110007, -11.153000773865072, -10.693476507339229, -10.696477033134975, -10.580143474136216, -10.296289176479979, -10.138214203100567, -9.7721560639397289, -9.6004136511713725, -9.4941866216766524, -9.2298736068115232, -9.1767733511048935, -8.8355830477065691, -8.7029213862659631, -8.8128010163679242, -8.4548401071832817, -8.5093965693097182, -8.2805645839970321, -8.1663929834149513, -7.7964273216409037, -7.933180705357481, -7.7336605336393109, -7.6051519846397913, -7.5221688207037003, -7.5036717733456886, -7.3556033262095957, -7.1601709606628035, -7.0925979917109396, -7.1150513112495979, -6.8162551196115437, -7.01373951164795, -6.5819138484374164, -6.6928753292306826, -6.5356600127866784, -6.4325241834221663, -6.4517391719262154, -6.3992033034768747, -6.1707812415916985, -6.1297625847982866, -6.0305674589992222, -5.8926141519745876, -5.7240407974846539, -5.992296132825544, -5.766845988571986, -5.8577278232134606, -5.7487543677442838, -5.6797347267334457, -5.5936571949410911, -5.579097865870426, -5.5925198678888215, -5.4441760257960299, -5.5276127905803811, -5.5079806967350171, -5.3679791802128953, -5.4245011153341895, -5.2945942616245167, -5.3158700521362139, -5.3517328974484322, -5.2519305626465078, -5.0992239195996127, -5.07347271633546, -5.1186855550468318, -5.2355137227267505, -5.0795427319527633, -5.0971681934000177, -4.9587620237892054, -5.0952094188819483, -5.1579827553496909, -4.8972460836641059, -4.9394319484313387, -4.9569107828685448, -4.87752516620478, -4.8509587160254402, -5.0208853209427993, -4.9499580050223422, -5.0406191849309865, -4.8210859265283652, -4.8541108366357024, -4.9075088844523007, -4.8711355511031948, -4.863053993090876, -4.8689984752077047, -4.7585346047023203, -4.8504152537834457, -4.7056703694871729, -4.8418282674532964, -4.7693421851582034, -4.7999320777844385, -4.5824813390555859, -4.8522585330412022, -4.7425139967072001, -4.5703112866476792, -4.6550587221675634, -4.6085211357705385, -4.6574259126145732, -4.7504405126797389, -4.6465809651184706, -4.8388134158895113, -4.6914987685197591, -4.6037942210476102, -4.6934160618719787, -4.7307830756283407, -4.6403509629106239, -4.7746656329638668, -4.6168290444353799, -4.5026000197160787, -4.6581832989632561, -4.4893783817444008, -4.5611366906981523, -4.5754558114074557, -4.551073721386711, -4.7023573086805097, -4.5436897202369311, -4.5139353858556817, -4.5667348334379287, -4.7501900666008492, -4.5590969623194448, -4.6572657396831882, -4.4475032302846458, -4.4968373715440757, -4.6584842603435934, -4.5688634992431121, -4.522367663440459], "learn_time": [0.02396226723988851, 0.022908882300059, 0.023282432556152345, 0.022693347930908204, 0.022819832960764567, 0.02301419973373413, 0.022608466943105063, 0.02404925028483073, 0.02269195318222046, 0.0227169672648112, 0.02249011993408203, 0.0227811336517334, 0.02278250058492025, 0.022502481937408447, 0.022645131746927897, 0.02282926638921102, 0.02279846668243408, 0.02314000129699707, 0.022377185026804605, 0.023065948486328126, 0.02253444989522298, 0.022857216993967693, 0.0226436177889506, 0.023103749752044676, 0.02253448168436686, 0.02257143259048462, 0.022421367963155112, 0.0224440336227417, 0.02263106902440389, 0.022981715202331544, 0.02267536719640096, 0.02282868226369222, 0.02276931603749593, 0.022516218821207683, 0.022606499989827476, 0.022499815622965495, 0.022774600982666017, 0.022672033309936522, 0.022501750787099203, 0.02310754855473836, 0.022582236925760904, 0.022766534487406412, 0.022582614421844484, 0.023086766401926678, 0.022222050031026206, 0.022797898451487223, 0.02235014835993449, 0.02291738192240397, 0.022605252265930176, 0.02269738515218099, 0.02288541793823242, 0.022350752353668214, 0.022861015796661378, 0.02269919713338216, 0.022637581825256346, 0.0227839986483256, 0.022690816720326742, 0.02252438465754191, 0.02288510004679362, 0.023099116484324136, 0.022500999768575034, 0.022434333960215252, 0.02270239988962809, 0.023079848289489745, 0.022451281547546387, 0.022825316588083903, 0.022929084300994874, 0.022861850261688233, 0.022568015257517497, 0.022718032201131184, 0.022524535655975342, 0.022566433747609457, 0.022717630863189696, 0.022576518853505454, 0.022329485416412352, 0.022732249895731606, 0.02263015111287435, 0.022789116700490317, 0.022622132301330568, 0.02276508410771688, 0.022437651952107746, 0.02247133255004883, 0.02264465093612671, 0.022394649187723794, 0.022903800010681152, 0.022776548067728677, 0.022390317916870118, 0.022734034061431884, 0.02269911766052246, 0.022829782962799073, 0.022489134470621744, 0.022890667120615642, 0.02278980016708374, 0.02266761859258016, 0.022522981961568198, 0.022787729899088543, 0.023052767912546793, 0.022529534498850503, 0.023005632559458415, 0.022769236564636232, 0.022872082392374673, 0.02239151398340861, 0.02256553570429484, 0.023292330900828044, 0.022331682840983073, 0.02267693281173706, 0.02270056406656901, 0.022643764813741047, 0.022656647364298503, 0.022522385915120444, 0.02286127010981242, 0.0228145162264506, 0.022449000676472982, 0.02286301851272583, 0.022560632228851317, 0.022393083572387694, 0.022763033707936604, 0.022769753138224283, 0.02254565159479777, 0.02252203623453776, 0.02273658514022827, 0.022718151410420735, 0.022342034180959067, 0.022586266199747723, 0.02261753479639689, 0.02272688547770182, 0.02246685028076172, 0.022973382472991945, 0.02247855265935262, 0.022584986686706544, 0.023144916693369547, 0.023055466016133626, 0.022531465689341227, 0.022769383589426675, 0.02261193593343099, 0.02292981545130412, 0.022436134020487466, 0.022659830252329507, 0.022557314236958823, 0.022444852193196616, 0.022254268328348797, 0.02278596560160319, 0.022331082820892335, 0.022636584440867105, 0.022755348682403566, 0.02291365067164103, 0.022697349389394123, 0.022493183612823486, 0.02260291576385498, 0.02228995164235433, 0.022500948111216227, 0.022534513473510744, 0.022372464338938396, 0.02237570285797119, 0.022704434394836426, 0.022762397925059002, 0.022528517246246337, 0.022543203830718995, 0.022440699736277263, 0.022359947363535564, 0.022471129894256592, 0.022397716840108235, 0.02254116932551066, 0.02286108334859212, 0.02274723450342814, 0.0224893848101298, 0.02257991631825765, 0.02249528169631958, 0.02314279874165853, 0.022662949562072755, 0.022855150699615478, 0.022429184118906657, 0.02233673334121704, 0.022643935680389405, 0.02224883238474528, 0.022706985473632812, 0.022576681772867837, 0.022578251361846925, 0.02237834930419922, 0.022849313418070474, 0.022918498516082762, 0.022320365905761717, 0.022444101174672444, 0.0224856972694397, 0.02256234884262085, 0.022620117664337157, 0.022511418660481772, 0.022672883669535317, 0.022645417849222818, 0.02244438330332438, 0.02228301763534546, 0.022308266162872313, 0.022509348392486573, 0.02283476988474528, 0.022743519147237143, 0.02253406842549642, 0.0224031130472819, 0.02220764954884847, 0.02264500061670939, 0.02286285161972046]} -------------------------------------------------------------------------------- /results/speedup/Reacher-v1-4: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.004702881971995036, 0.09271716674168905, 0.09254945119222005, 0.09283864895502726, 0.09294963280359904, 0.09280315240224203, 0.09258743127187093, 0.09264106750488281, 0.09275381962458293, 0.09274619817733765, 0.09249578714370728, 0.0926649808883667, 0.0928910493850708, 0.09274406830469767, 0.09271665016810099, 0.09282493193944295, 0.09268580277760824, 0.09263333479563395, 0.09269991318384806, 0.09266923268636068, 0.09257154862085978, 0.09284536441167196, 0.09294468561808268, 0.09267944892247518, 0.09265535275141398, 0.09274848302205403, 0.09237391551335652, 0.09283470312754313, 0.09269390106201172, 0.0927316149075826, 0.09246011575063069, 0.09262051582336425, 0.09252366622289022, 0.09264146486918132, 0.09292058149973552, 0.09266070127487183, 0.09287826617558798, 0.09258438348770141, 0.09293923377990723, 0.09279101689656576, 0.09264848232269288, 0.09274998108545938, 0.0925426999727885, 0.09264048337936401, 0.09276201725006103, 0.09265289704004924, 0.09272653261820475, 0.09290448427200318, 0.0926399827003479, 0.09294325113296509, 0.09262518485387167, 0.09266669750213623, 0.09276305039723715, 0.09276358286539714, 0.09263466596603394, 0.09280256430308025, 0.09269116719563802, 0.09266903400421142, 0.09264573256174723, 0.09276991685231527, 0.09282243649164836, 0.09259503285090129, 0.09252896706263224, 0.09268016815185547, 0.09266984860102336, 0.09254246552785238, 0.09267359972000122, 0.09266343514124552, 0.09255393346150716, 0.09232229789098104, 0.09248056809107462, 0.0925567348798116, 0.09259084860483806, 0.09273059765497843, 0.0926037351290385, 0.09253770112991333, 0.09246171315511068, 0.0927289326985677, 0.09265751441319783, 0.09256481726964315, 0.09266565243403117, 0.09285430113474528, 0.0927773356437683, 0.0928481658299764, 0.09283336400985717, 0.09257924954096476, 0.09283633232116699, 0.09262258211771647, 0.09291796684265137, 0.09284841616948446, 0.09298636515935262, 0.09277559916178385, 0.09260087013244629, 0.09283060232798258, 0.09280448357264201, 0.09260921478271485, 0.09286566575368245, 0.09263148307800292, 0.09289708137512206, 0.09275104999542236, 0.09270054896672567, 0.09283301830291749, 0.09249009688695271, 0.09281658331553141, 0.09267364740371704, 0.09282424847284952, 0.0928779681523641, 0.09259851773579915, 0.09271058638890585, 0.09259039958318074, 0.0928551157315572, 0.09279806613922119, 0.09270319938659669, 0.09284768501917522, 0.09266606569290162, 0.0928654154141744, 0.09292331536610922, 0.09283790191014608, 0.09280351797739665, 0.09265294869740805, 0.09290493329366048, 0.09271249771118165, 0.0926129142443339, 0.0929727832476298, 0.0926305333773295, 0.09278251727422078, 0.09277613162994384, 0.09265578190485636, 0.0926965355873108, 0.09272075096766154, 0.0928268829981486, 0.09262892007827758, 0.09271406730016073, 0.09266579945882161, 0.09322126706441243, 0.0927849014600118, 0.0927143136660258, 0.09286248286565145, 0.09289318323135376, 0.09288820028305053, 0.09275208314259847, 0.0929047147432963, 0.09286056756973267, 0.0928161342938741, 0.09300639629364013, 0.09273558457692464, 0.09283529917399089, 0.09273840188980102, 0.09295171896616618, 0.09282783269882203, 0.09269749720891317, 0.09259131749471029, 0.09274225234985352, 0.0929786483446757, 0.09278108278910319, 0.09276711543401082, 0.0926949659983317, 0.09278413454691568, 0.0930374026298523, 0.09281040032704671, 0.09295264879862468, 0.09304829835891723, 0.0927906354268392, 0.09258840084075928, 0.09273465077082316, 0.09293913046518962, 0.09276791413625081, 0.0931713342666626, 0.09293578465779623, 0.09295861721038819, 0.09275099833806356, 0.09275036652882894, 0.09262179931004842, 0.09264026880264283, 0.09272963205973307, 0.09287818272908528, 0.09270488421122233, 0.09308029810587565, 0.09305046399434408, 0.09264501730600992, 0.09260228474934896, 0.09301581780115763, 0.09307533502578735, 0.09294148286183675, 0.09275246858596801, 0.09301374753316244, 0.09278259674708049, 0.09286693334579468, 0.09278293450673421, 0.0925223191579183, 0.09264416694641113, 0.09296199878056845, 0.09267723560333252, 0.09274496634801228, 0.09288881619771322, 0.09282244841257731, 0.09285204807917277, 0.09292121728261311, 0.09287803570429484, 0.09283893505732219, 0.09272846778233847], "mean_reward": [-121.24381494723124, -107.3255345838964, -104.02054340114478, -100.86853755940754, -94.816571139926808, -91.933055835146774, -89.000622185429435, -86.407084087600495, -83.297759017873034, -81.093077550277755, -76.681009345589501, -73.490099887464666, -71.578220145062375, -68.816967467419531, -68.429355581074603, -64.521536389390306, -63.005645479243171, -60.573526777822252, -57.799870401626286, -56.03008569829916, -55.191294142682658, -53.352087150939774, -51.4958635202415, -50.404218794178455, -47.958925468047887, -46.568888150549135, -45.293094760373698, -43.570452372737201, -42.150352678905712, -40.97761539632895, -39.85103191863292, -39.067962273088398, -37.792950936065992, -36.411322223732938, -35.471183856331457, -33.830668340420701, -33.281870442296494, -32.657188475033699, -31.26489492941425, -30.155828410577541, -29.194593567898902, -28.300451963223537, -27.857490320827257, -26.657596553462771, -26.280364450343367, -24.776948956037867, -24.155419549908036, -24.157623757027849, -23.513958025850105, -23.388194257972515, -22.146030529278455, -21.639079408780859, -21.036721766663369, -20.52741138203821, -19.973055336520904, -19.360984139832418, -18.986587696892979, -18.268964587396315, -17.815274787080881, -17.492750133455058, -17.312647790984595, -16.663327609355189, -16.269343961793606, -15.926973967989554, -15.248185720107235, -15.130275689103627, -14.441393377418267, -14.608477410894704, -13.766165092981524, -13.636057548392932, -13.475156983460437, -12.857065068016418, -12.754230004958687, -12.461584367773822, -11.927852981508318, -11.952875968564459, -11.466032305837064, -11.104271190095401, -10.987129479547111, -10.639805778530487, -10.441728486814059, -10.275883043381635, -10.252633386004286, -9.9493342150659867, -9.8075177045692516, -9.5885067675513138, -9.1719903248586334, -9.0479658823314111, -8.9312833980426127, -8.9975468186248868, -8.7003488210997908, -8.6476490130399899, -8.1874184289028449, -8.3304021662986454, -8.1667682493862852, -7.8760973838578563, -7.6160128494997776, -7.5719951257913962, -7.3797723757048361, -7.3293566388682914, -7.4295283059544568, -7.2418009812865964, -7.1094305671654565, -6.9553976908320978, -6.920366506507901, -6.7042589113537572, -6.7531766206567925, -6.5205090684991225, -6.5165356460141322, -6.2099086690173335, -6.2912073423438564, -6.2128035886778639, -5.9673169558231693, -6.1737012715957995, -5.9832303259426842, -5.9813197224882062, -5.8281913688318117, -5.688934151960634, -5.6429234725293282, -5.6545463083632512, -5.6344492241357447, -5.4238322401972479, -5.6128839584867398, -5.4400312911083413, -5.6891637801405865, -5.5193321848150534, -5.3111537841070566, -5.2048653111674215, -5.2764512634579503, -5.1469814912962564, -5.3726042680226778, -5.2637960217513351, -5.0715081192859648, -5.0985538649053934, -5.1324625087921758, -5.1597911581257776, -5.2435622177759535, -5.0968289338140282, -5.0751921864409644, -5.1319068095064555, -4.9763661883548318, -4.8973966749475046, -4.909652732723214, -4.9967605387947058, -4.8680139565465712, -4.7213633177540038, -4.7868177791944539, -4.7339124778984454, -4.8414862294701146, -4.9348970754485189, -4.984963971761629, -4.9881284448889813, -4.8934530235539206, -4.7454382211669737, -4.9412758053979129, -4.8239979015072754, -4.6752157273924775, -4.7116306726646062, -4.6864400615579829, -4.6794893672569238, -4.6895534253244175, -4.7272436546048322, -4.7526783907322274, -4.7057735910753875, -4.7323484319080134, -4.6628176061910001, -4.6299934888876386, -4.5150149376326887, -4.6274134492524093, -4.6942724847196748, -4.6719694019432101, -4.6367484907148677, -4.7799489072704668, -4.713089906334111, -4.4232971885723602, -4.6509409956685106, -4.6801443345906284, -4.4896102552713897, -4.4807952847882566, -4.5669348212395322, -4.5958988673720365, -4.5958817816877442, -4.6667494066638646, -4.4758714320464659, -4.515605242241219, -4.6219634013593183, -4.4590089342968948, -4.6275543799377328, -4.6832534066051554, -4.559197932022041, -4.4711315797117548, -4.6135105802591951, -4.5053863728919987, -4.3658710690595397, -4.4897697830021599, -4.5658122022728458, -4.5102040223272937, -4.4992965870193204, -4.4241788074809323, -4.3973147828042798, -4.48147376718246], "learn_time": [0.0036674698193868, 0.023523298899332683, 0.02336225112279256, 0.02350625197092692, 0.02339251438776652, 0.023453497886657716, 0.023464500904083252, 0.025165780385335287, 0.023017152150472005, 0.023564549287160237, 0.02329361836115519, 0.02320210138956706, 0.02341678539911906, 0.023332250118255616, 0.02314631938934326, 0.023297699292500813, 0.02334821621576945, 0.023447068532307942, 0.02311871846516927, 0.02361531654993693, 0.023863550027211508, 0.023507583141326904, 0.02321985165278117, 0.023015948136647542, 0.02346878449122111, 0.023203186194101968, 0.023558266957600913, 0.02286134958267212, 0.023408917586008708, 0.0232691486676534, 0.023644936084747315, 0.023757266998291015, 0.023599799474080405, 0.023630583286285402, 0.02348933219909668, 0.023547601699829102, 0.023456931114196777, 0.02359464963277181, 0.023460968335469564, 0.02320093313852946, 0.023676613966623943, 0.02373186747233073, 0.023263430595397948, 0.023537365595499675, 0.02351311445236206, 0.023280266920725504, 0.023140517870585124, 0.02375195026397705, 0.02369372049967448, 0.02350026766459147, 0.023058231671651205, 0.023316550254821777, 0.023419280846913654, 0.023399619261423747, 0.023032315572102866, 0.0230636994043986, 0.023481333255767824, 0.02388533353805542, 0.023697499434153238, 0.024035350481669108, 0.02368938128153483, 0.0235824187596639, 0.023454535007476806, 0.02342961629231771, 0.023843316237131755, 0.023309850692749025, 0.02362501621246338, 0.02355608542760213, 0.023992466926574706, 0.02352664868036906, 0.023465534051259358, 0.02375980218251546, 0.02387396494547526, 0.02352296511332194, 0.02358668247858683, 0.02375641663869222, 0.023144551118214927, 0.023744599024454752, 0.02357151508331299, 0.023762198289235432, 0.02355500062306722, 0.023736584186553954, 0.023260851701100666, 0.023511834939320884, 0.02381300131479899, 0.023838833967844645, 0.02307971715927124, 0.023477598031361898, 0.023515299956003825, 0.023918553193410238, 0.023804914951324464, 0.0240395983060201, 0.023657735188802084, 0.0239738663037618, 0.023925483226776123, 0.02395780086517334, 0.02382524808247884, 0.02408441702524821, 0.023469301064809163, 0.023863780498504638, 0.024103148778279623, 0.02387334903081258, 0.02392651637395223, 0.023950918515523275, 0.023909334341684976, 0.02384251356124878, 0.024120616912841796, 0.023881681760152183, 0.023727317651112873, 0.024413148562113445, 0.0240956981976827, 0.024034484227498373, 0.024128933747609455, 0.024523401260375978, 0.023990714550018312, 0.023581031958262125, 0.023838897546132404, 0.024315130710601807, 0.02371323506037394, 0.024084715048472087, 0.024106283982594807, 0.023533515135447183, 0.023929381370544435, 0.0241209348042806, 0.02441553274790446, 0.024014981587727864, 0.02354463338851929, 0.024345465501149497, 0.02411705255508423, 0.02384571631749471, 0.02418737014134725, 0.023578151067097982, 0.024188685417175292, 0.024333115418752035, 0.024452598889668782, 0.023472718397776284, 0.024243398507436117, 0.023955349127451578, 0.02435491482416789, 0.024250332514444986, 0.024043532212575276, 0.023807501792907713, 0.023857982953389485, 0.02440556287765503, 0.024164716402689617, 0.02392743428548177, 0.02428911526997884, 0.024231966336568198, 0.02419824997584025, 0.02347596486409505, 0.02417060136795044, 0.02433393398920695, 0.02401874860127767, 0.023872101306915285, 0.0241169532140096, 0.023825601736704508, 0.023748048146565757, 0.02455058495203654, 0.024203431606292725, 0.02404543161392212, 0.023828200499216714, 0.024147983392079672, 0.024114219347635905, 0.02418051560719808, 0.024726951122283937, 0.024669965108235676, 0.024143834908803303, 0.024101114273071288, 0.024098567167917886, 0.02390660047531128, 0.024271297454833984, 0.024409568309783934, 0.024114501476287842, 0.023886700471242268, 0.023960248629252116, 0.02457145055135091, 0.02432485024134318, 0.02390009959538778, 0.024051447709401447, 0.023696168263753255, 0.024471016724904378, 0.024507447083791097, 0.024181648095448812, 0.024580701192220052, 0.023962120215098064, 0.024187866846720377, 0.024168884754180907, 0.024430580933888755, 0.023868370056152343, 0.023903950055440267, 0.02421753406524658, 0.024164549509684243, 0.024365834395090737, 0.024432281653086346, 0.024195082982381187, 0.024398767948150636, 0.02405623197555542, 0.024056565761566163, 0.02418740193049113, 0.02444233496983846, 0.024207150936126708]} -------------------------------------------------------------------------------- /results/speedup/Reacher-v1-7: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.004590717951456705, 0.08647209803263346, 0.08670106728871664, 0.08609223365783691, 0.08629038333892822, 0.0860451340675354, 0.08617510000864664, 0.08653858502705893, 0.08610313336054484, 0.08607118527094523, 0.08607353369394938, 0.08595685164133708, 0.08615425030390421, 0.08600440025329589, 0.08604991833368937, 0.08612800041834513, 0.08600105047225952, 0.08613980213801066, 0.0861959973971049, 0.08603983322779338, 0.08619430065155029, 0.08602791627248128, 0.08616833289464315, 0.08626436789830526, 0.08606664737065634, 0.08595886627833048, 0.08606751759847005, 0.08610069751739502, 0.08608308235804239, 0.08617785374323526, 0.0861737330754598, 0.08590419689814249, 0.08602948188781738, 0.08614928325017293, 0.08603196541468303, 0.0860931674639384, 0.0859803835550944, 0.08612691561381022, 0.08607010046641032, 0.08599191904067993, 0.08588091929753622, 0.08615511655807495, 0.08615838289260865, 0.08585086663564047, 0.08613856633504231, 0.08608916600545248, 0.0862950841585795, 0.0860249161720276, 0.08602511882781982, 0.0859905481338501, 0.08613491455713908, 0.08606918255488077, 0.08596816460291544, 0.08613870143890381, 0.08605999946594238, 0.08605028390884399, 0.08597176472345988, 0.08609944979349772, 0.0860278844833374, 0.08593443632125855, 0.08604628245035807, 0.08617036739985148, 0.08614999850591024, 0.08601928154627482, 0.08606544733047486, 0.08596068223317464, 0.08601121505101522, 0.08594841559727986, 0.0859703818957011, 0.0860409140586853, 0.08587323029836019, 0.08590136766433716, 0.08610972960789999, 0.08589603503545125, 0.08592338562011718, 0.08594308296839397, 0.0859938661257426, 0.08618005116780598, 0.0859527349472046, 0.08597204685211182, 0.08601563374201457, 0.08589589993158976, 0.08606683015823365, 0.08619096676508585, 0.0860358993212382, 0.08593271573384603, 0.08614116907119751, 0.08597276608149211, 0.08579649925231933, 0.08588414986928304, 0.08592918316523233, 0.08592704931894939, 0.0859839677810669, 0.08598436911900838, 0.08593328396479288, 0.08598418633143107, 0.08600038290023804, 0.08642468452453614, 0.0859106699625651, 0.08577423095703125, 0.08607271512349447, 0.08591763178507487, 0.08594428300857544, 0.08608378171920776, 0.08595658540725708, 0.08598806460698445, 0.08603033622105917, 0.08593761920928955, 0.08604986667633056, 0.0859140157699585, 0.08595875104268393, 0.08596523205439249, 0.0858386516571045, 0.0857836643854777, 0.08601679801940917, 0.08591813246409098, 0.08605571587880452, 0.08603473504384358, 0.08591306606928507, 0.08602550029754638, 0.08603466749191284, 0.08593130111694336, 0.0859598159790039, 0.08600511948267618, 0.08588509956995646, 0.08597026666005453, 0.08603413105010986, 0.08597388267517089, 0.08608518441518148, 0.08608906666437785, 0.0858769178390503, 0.08595504760742187, 0.08591759999593099, 0.08595589796702068, 0.08607736825942994, 0.08598578373591105, 0.08599486748377481, 0.08599836826324463, 0.085975448290507, 0.08603631655375163, 0.08604164918263753, 0.08604689836502075, 0.08610504865646362, 0.0858812689781189, 0.08587689797083536, 0.0860968828201294, 0.08615786631902059, 0.08599448204040527, 0.08597801526387533, 0.08586446444193523, 0.08611076672871908, 0.08603686889012654, 0.08602836529413858, 0.0859042485555013, 0.08590073188145955, 0.08605171839396158, 0.08607518275578817, 0.08601531585057577, 0.08609453439712525, 0.08598191738128662, 0.08594003121058146, 0.08587126731872559, 0.08592136700948079, 0.08600273132324218, 0.08602203130722046, 0.08613210121790568, 0.08594971497853597, 0.08604005177815756, 0.08606903553009033, 0.08600285053253173, 0.08603923320770264, 0.0860131859779358, 0.08599561850229899, 0.08591553370157877, 0.08611559867858887, 0.08593710263570149, 0.08616678317387899, 0.08589556614557901, 0.08595171769460043, 0.08601818482081096, 0.08600948651631674, 0.08616526921590169, 0.0861620306968689, 0.08613158464431762, 0.08606534798940023, 0.08610544999440511, 0.08594428300857544, 0.08640388250350953, 0.08592265049616496, 0.08612373669942221, 0.08606136639912923, 0.08604886531829833, 0.08611398537953695, 0.08601428270339966, 0.08612598180770874, 0.08602935075759888, 0.08604931433995565, 0.08628209829330444, 0.08603036403656006, 0.08618418375651042, 0.08605513175328573], "mean_reward": [-101.31027219438734, -104.02342469807941, -100.79410031685684, -97.975909716387548, -94.316555658154371, -91.309372331144701, -86.288570046569589, -85.424852753277634, -83.776067916217201, -78.3108157463151, -74.505057767128946, -74.415203567211506, -69.689564883338392, -68.269524417415269, -65.61457952768076, -63.222352575892138, -62.623506472847595, -60.988334070664543, -58.027108022632305, -54.858635841994847, -53.75164856715103, -52.977530082551482, -50.865127319166753, -49.30906957544407, -47.900579444406787, -46.127684127740338, -44.54581358988078, -43.208317865503759, -42.094241296870713, -40.668105054898128, -39.884414157666022, -38.098149370409942, -37.014901907016963, -35.694633634966216, -34.666093607512245, -33.450254112670059, -32.579338808914535, -31.713037799757625, -30.871203045980934, -29.773376248372788, -29.581025013031716, -28.609220609693182, -27.528344503826453, -26.373231769794209, -25.856290982706167, -25.404727371850409, -24.842914094717759, -23.845762970000969, -23.39226034450639, -22.265230537476882, -21.999572153716628, -21.385875406372175, -20.947704694563829, -20.256784521675293, -19.55892089116476, -19.009104082690072, -18.914212696241908, -17.776001595494392, -17.700581585622487, -17.166777009576467, -16.829140259073792, -16.31215167363488, -16.140725215630837, -15.493030956931971, -14.98930617539323, -14.634062438689897, -14.628416275887117, -13.990388473631453, -13.80389863651992, -13.436908243486782, -13.197509325194414, -12.498314042551819, -12.569053864047442, -12.259255712746802, -11.97317680948094, -11.499621769822447, -11.40219544105482, -11.086134495657324, -10.970117819957233, -10.580568956950806, -10.408112082498862, -10.074451307666981, -9.9115835493852469, -9.689834760351161, -9.4018400376751607, -9.27746929903069, -9.2279712275241117, -9.2035439140487014, -8.6973584143742304, -8.7074031315623053, -8.5521517488850698, -8.2082496713204343, -8.1420084270969504, -8.26145916409906, -7.9920025348323076, -7.8924019876955143, -7.6693989499047017, -7.5554905283977707, -7.395759952611793, -7.2466186991985762, -7.2411709370800414, -7.1842201850737766, -6.9922947507119284, -6.7699375396159276, -6.8110668658768319, -6.6733797059055249, -6.5728958852839972, -6.5116093498460614, -6.550277016829642, -6.4725118776435266, -6.4204884768215127, -6.396290668465574, -6.4171780162530441, -6.1246634623591945, -6.2250550434191814, -6.1321710442810158, -6.0625793613912426, -6.0274077005642317, -6.0356488347171515, -5.8716869690659701, -5.8335659489034608, -5.6055270241399002, -5.5929149100419275, -5.7301048818008207, -5.5496435785159113, -5.7739662198735715, -5.5197298144925231, -5.471508835050602, -5.4135020447335531, -5.4872359252470053, -5.5355510136304069, -5.3629039112379431, -5.407405009614215, -5.4392595944270647, -5.1089068673549578, -5.3917100812163081, -5.365526198730632, -5.3529181941360093, -5.3044834620127137, -5.2869546707704087, -5.219432808401625, -5.2380756102006201, -5.2636011395189577, -5.0636950344408724, -5.0057608482462648, -5.1159416780185056, -5.0031730052840473, -4.9846099908240138, -5.069362011304456, -4.9836189999134763, -5.1929988604556154, -5.0102957018255339, -4.9035055037209947, -5.0860050195308961, -5.0423270378714244, -5.017374054265944, -4.8600619579549571, -4.9118770025122691, -5.0532872781464127, -4.9452124967246052, -5.0177983519917806, -5.0188284336675277, -4.8955162998276425, -5.0722977287760278, -4.8735672112674777, -5.0768789750770349, -5.0619328769731435, -4.9110913305168511, -4.953820195320966, -4.92243346675723, -4.8351712089502525, -4.9265889178471483, -4.7431241303621494, -4.8706330097675465, -5.0464169950156696, -4.7621532968135414, -4.9407190152850839, -4.7819885231442276, -4.8672277844556042, -4.7540238528965304, -4.5973977598375741, -4.8669823735702584, -4.7601328510939647, -4.679575747063196, -4.7226496721630538, -4.8108613796760098, -4.7405154997562224, -4.7571116716122956, -4.6766225948619962, -4.6834766233324983, -4.7284296062410531, -4.7494683470190351, -4.4412552331760313, -4.6944921132565662, -4.4650616241071148, -4.7200179135234341, -4.73621292728083, -4.7808486500556224, -4.7017746430320271, -4.5785236496670789, -4.606508716720322], "learn_time": [0.0037189483642578124, 0.024146501223246256, 0.02393406629562378, 0.023763068517049155, 0.02346030076344808, 0.023491116364796956, 0.02314586639404297, 0.02481660048166911, 0.02356963555018107, 0.023627734184265135, 0.023550148804982504, 0.023288432757059732, 0.0234199325243632, 0.022916885217030843, 0.023534985383351643, 0.023441533247629803, 0.023383816083272297, 0.023021451632181802, 0.02372955083847046, 0.023811733722686766, 0.023790983359018962, 0.023635868231455484, 0.023356350262959798, 0.022996981938680012, 0.02302889823913574, 0.023404451211293538, 0.02347400188446045, 0.023659451802571615, 0.023601917425791423, 0.023414317766825357, 0.02348178227742513, 0.022902047634124754, 0.02368101676305135, 0.02344701687494914, 0.023337451616923015, 0.02297983169555664, 0.023375562826792397, 0.023435099919637045, 0.023616846402486166, 0.023247718811035156, 0.02371616760889689, 0.02330126762390137, 0.023619067668914796, 0.023516801993052165, 0.023574002583821616, 0.023435668150583903, 0.023097149531046548, 0.023086798191070557, 0.023456700642903647, 0.02342766523361206, 0.023504896958669027, 0.023157350222269692, 0.023441600799560546, 0.02356390158335368, 0.023650983969370525, 0.022488447030385335, 0.02361085017522176, 0.023382234573364257, 0.023491152127583823, 0.023647801081339518, 0.023485382397969563, 0.02349040110905965, 0.023619266351064046, 0.023844146728515626, 0.023880648612976074, 0.023604365189870198, 0.023337916533152262, 0.023272732893625896, 0.023448050022125244, 0.023688097794850666, 0.02351688543955485, 0.023362664381663005, 0.02354903221130371, 0.02356581687927246, 0.023548285166422527, 0.023397429784138998, 0.02294170061747233, 0.023327696323394775, 0.02337324619293213, 0.023006014029184976, 0.02362823486328125, 0.02328191598256429, 0.02358321746190389, 0.02365691661834717, 0.02353431781133016, 0.02363581657409668, 0.023463284969329833, 0.023582148551940917, 0.023391799132029215, 0.02347481648127238, 0.022958783308664958, 0.023264749844868978, 0.023675282796223957, 0.023182785511016844, 0.022850648562113444, 0.02324440081914266, 0.023539602756500244, 0.023569798469543456, 0.023321501413981118, 0.0231682817141215, 0.023100602626800536, 0.02300738493601481, 0.023424947261810304, 0.023304184277852375, 0.023420882225036622, 0.023211097717285155, 0.0236700177192688, 0.023580034573872883, 0.02300496498743693, 0.02334820032119751, 0.023415247599283855, 0.02280104955037435, 0.023478098710378013, 0.023067601521809897, 0.023517966270446777, 0.02304088274637858, 0.023511230945587158, 0.023823551336924233, 0.023264630635579427, 0.023807247479756672, 0.023261698087056477, 0.023401530583699544, 0.0238014817237854, 0.023405134677886963, 0.02322280009587606, 0.022952632109324137, 0.02340866724650065, 0.023686333497365316, 0.0234106183052063, 0.023524149258931478, 0.023366502920786538, 0.024301564693450926, 0.02337923049926758, 0.023115766048431397, 0.02323609987894694, 0.023676435152689617, 0.023318950335184732, 0.023559832572937013, 0.0231059988339742, 0.023589050769805907, 0.02377709945042928, 0.023804330825805665, 0.02354596455891927, 0.023185602823893228, 0.023026450475056966, 0.023434603214263917, 0.023059884707132976, 0.023303782939910887, 0.023286497592926024, 0.023151103655497232, 0.023481635252634685, 0.023488767941792808, 0.02340521812438965, 0.023422551155090333, 0.023358933130900063, 0.023550717035929362, 0.02355428139368693, 0.0230787992477417, 0.022924319903055827, 0.023111633459726968, 0.023284482955932616, 0.023182698090871177, 0.023777568340301515, 0.02295693556467692, 0.023334630330403647, 0.023928483327229817, 0.023446416854858397, 0.02377188205718994, 0.023377601305643717, 0.02443416913350423, 0.023548650741577148, 0.022895367940266927, 0.023113083839416505, 0.023675715923309325, 0.023274115721384683, 0.02355960210164388, 0.023322502772013348, 0.023709551493326823, 0.023802947998046876, 0.023617998758951823, 0.0237662672996521, 0.02366956869761149, 0.023239751656850178, 0.0234408696492513, 0.023495896657307943, 0.023856298128763834, 0.02348411480585734, 0.023539082209269205, 0.023371700445810953, 0.023800333340962727, 0.02395939826965332, 0.023590548833211263, 0.02326600154240926, 0.023528683185577392, 0.023363749186197918, 0.02364133596420288, 0.023212683200836182, 0.023438465595245362, 0.023742783069610595, 0.02361898422241211, 0.023636881510416666]} -------------------------------------------------------------------------------- /results/speedup/Reacher-v1-8: -------------------------------------------------------------------------------- 1 | {"rollout_time": [0.004615100224812826, 0.08696444829305013, 0.08676003217697144, 0.08699273268381755, 0.08695613543192546, 0.0869998296101888, 0.08709818124771118, 0.08680326541264852, 0.08696505228678385, 0.08703423341115316, 0.08688633441925049, 0.08766526778539022, 0.08704911867777507, 0.08703901767730712, 0.08717453479766846, 0.08695171674092611, 0.08685215314229329, 0.08702675104141236, 0.08695881764094035, 0.0871682325998942, 0.08688438336054484, 0.08697306712468465, 0.08691920042037964, 0.08691656589508057, 0.08713935216267904, 0.08701198498408, 0.08701511621475219, 0.08704189856847128, 0.08691004912058513, 0.08696686426798503, 0.08689096768697104, 0.08702200253804525, 0.08712513049443563, 0.08687676986058553, 0.08705941438674927, 0.08693238496780395, 0.08698530197143554, 0.0870932698249817, 0.0868062178293864, 0.08712586561838785, 0.08687250216801962, 0.08708204825719197, 0.08704646825790405, 0.08709516525268554, 0.0870677669843038, 0.0869973341623942, 0.08704484701156616, 0.08697876532872519, 0.08692854642868042, 0.0870633840560913, 0.08689250151316324, 0.08681148290634155, 0.08688438336054484, 0.0871421496073405, 0.086851700146993, 0.08700194756189981, 0.08689839839935302, 0.0868887186050415, 0.08688560326894125, 0.0869825839996338, 0.08701476653416952, 0.08689909776051839, 0.08687408367792765, 0.08697009881337484, 0.08679043451944987, 0.08691388368606567, 0.0868902325630188, 0.0867538849512736, 0.08692556619644165, 0.08683448632558187, 0.08680588006973267, 0.08704398473103842, 0.08698391517003377, 0.0869903326034546, 0.08690776427586873, 0.08679861625035604, 0.08697848320007324, 0.0869151512781779, 0.08676429986953735, 0.0868699828783671, 0.08691568374633789, 0.08691376845041911, 0.0870081345240275, 0.08687703212102255, 0.0868010679880778, 0.08683366775512695, 0.08697704871495565, 0.08695983489354452, 0.08685410022735596, 0.08688774903615316, 0.08695093393325806, 0.08690465291341146, 0.08681644996007283, 0.08692719936370849, 0.08700294891993204, 0.0868153174718221, 0.08698016802469889, 0.0866903821627299, 0.08699021736780803, 0.08718168338139852, 0.08680458466211954, 0.08700174887975057, 0.08676108519236246, 0.08683098554611206, 0.08691708644231161, 0.08685638507207234, 0.08675821622212727, 0.08696536620457967, 0.0868839979171753, 0.08660294612248738, 0.08697341680526734, 0.08674499988555909, 0.08665539820988973, 0.08680968284606934, 0.08666266600290934, 0.08678258260091146, 0.08696550130844116, 0.0868806004524231, 0.0868807315826416, 0.08680091698964437, 0.0867859164873759, 0.0867641011873881, 0.08678003152211507, 0.08667383193969727, 0.086790398756663, 0.08688464959462484, 0.08662981589635213, 0.0869781494140625, 0.08691448370615641, 0.08672918081283569, 0.08671866655349732, 0.08705996672312419, 0.08695056438446044, 0.08669241666793823, 0.08681035041809082, 0.0868286649386088, 0.0866834322611491, 0.08676978349685668, 0.08685656785964965, 0.08669018348058065, 0.0866135835647583, 0.08658558130264282, 0.0869386355082194, 0.08664373556772868, 0.08682043552398681, 0.08675738175710042, 0.08676731586456299, 0.08679003318150838, 0.0866242527961731, 0.08666631778081259, 0.08669738372166952, 0.08694090048472086, 0.08661979834238688, 0.08672586679458619, 0.08690676291783651, 0.08660200039545694, 0.08677396774291993, 0.08668200174967448, 0.08667253255844116, 0.08659800291061401, 0.08671338558197021, 0.08686336676279703, 0.08678313096364339, 0.0866164485613505, 0.08699926535288492, 0.08665201663970948, 0.08657193183898926, 0.0865647832552592, 0.08663591543833414, 0.08643354972203572, 0.08675201733907063, 0.08669151465098063, 0.08663538297017416, 0.08647165298461915, 0.08660200039545694, 0.08669723272323608, 0.08666075070699056, 0.08653898239135742, 0.08660190105438233, 0.0866249163945516, 0.08667396704355876, 0.0866686979929606, 0.08652753432591756, 0.08667486906051636, 0.08659291664759318, 0.0866629679997762, 0.08682873249053955, 0.08666786750157675, 0.08696784973144531, 0.08632910251617432, 0.08651169935862223, 0.08641683260599772, 0.08662844896316528, 0.08639561732610067, 0.08662398258845011, 0.08678404887517294, 0.0865578015645345, 0.08663218418757121, 0.08641811609268188, 0.08653769890467326, 0.08651896715164184], "mean_reward": [-115.53584396305943, -110.07085614180629, -106.24557451127872, -100.19070449933336, -99.219870343034216, -95.284637143516278, -91.225511926104602, -88.919015412403056, -85.29098791588541, -82.344289312313634, -80.31672420748707, -76.462097418944566, -73.565552496364106, -69.983279917289678, -68.950661140373839, -67.275302623499627, -64.606908319701603, -63.315721664846585, -60.943500407264203, -58.542305145613973, -56.589568139650957, -55.323096059980635, -53.543240810093153, -51.216283775684381, -49.841245517499303, -48.016432110840896, -46.489814118168184, -45.776208369412579, -43.673248910327324, -42.293888953271704, -41.094653181028001, -40.056100113994866, -38.961741414667273, -37.972293250367713, -36.469675701221476, -34.648420589709573, -34.056439666619241, -32.416905187431261, -31.974205350732429, -31.11759928442498, -30.585109502279899, -29.79276916729637, -28.799914369572718, -28.107952839315363, -26.939602238278283, -26.452125074221904, -25.712553142597688, -25.075739108155719, -24.28992945291375, -23.955443749462525, -23.088524968900902, -22.274076189908396, -21.906620672709554, -20.934245644216514, -20.611069341342635, -20.002791378390089, -19.799520365257568, -19.46105468015293, -18.824285972213609, -18.538650550219153, -17.838934881349331, -17.45633240318891, -16.89391182382678, -16.253407006437094, -15.464259152419913, -15.487982363568447, -14.896746543513286, -14.567937144228845, -14.126422509469528, -13.749770375194403, -13.831771923932587, -13.124429767365736, -12.902629689207842, -12.947006360327448, -12.499086417498232, -12.40263417171604, -12.174535924361189, -11.833508992277624, -11.475144158990403, -10.982701241671258, -10.864049224495682, -10.536588565299263, -10.496799329640984, -10.25149564671112, -10.037926619942064, -9.6624558797572924, -9.7061379434466701, -9.6267744672773325, -9.1834840050753588, -9.1013469226095758, -8.8069488835632548, -8.7485659622888434, -8.4958297258112996, -8.4841020041166804, -8.3609235424017783, -8.0794575482512005, -7.8323773909318586, -7.555790034021463, -7.8056328405830042, -7.5163012479419713, -7.4540050886803551, -7.3140016768684175, -7.2383487043087875, -7.2145774984214324, -7.0205704439247612, -7.1542341153567355, -6.8920229223414831, -6.7199054514236565, -6.7850944953240253, -6.6500499977899237, -6.4286076039034992, -6.5394120441065704, -6.4611110702996717, -6.2958816701900489, -6.2427943647283541, -6.414034324432726, -6.2543507384283998, -6.1933399054927438, -5.9547283072100985, -6.0021052118825571, -5.8631163325553848, -5.8284412561087597, -5.8702318013967565, -5.934913745138358, -5.6478927892343513, -5.734581275961709, -5.6363611944988188, -5.6736156530844397, -5.5573418940171972, -5.7108181883556757, -5.5135459831999745, -5.6805951929747485, -5.6425611205444532, -5.5258191072837208, -5.5850957974066908, -5.5545851542027416, -5.3069320504074939, -5.450868222285604, -5.5178658343347164, -5.3630821639516117, -5.4523112487026859, -5.2512217726429045, -5.2736236369431442, -5.2299425382757523, -5.3468921640089642, -5.3294072261754497, -5.1294845860564449, -5.3509985755008662, -5.2533894686560751, -5.1293982589310234, -5.1528883157123451, -5.0669468424450903, -5.0210637373985012, -5.0784160883636629, -5.1498753372544792, -5.2235227797427024, -5.1396726621034592, -5.3778448179220657, -4.9920224321131226, -5.1020281150362434, -5.1905672790933561, -4.9731589252880335, -5.1621549265428213, -5.1930982576485318, -4.9876747866945923, -4.9653122292529916, -4.9761022921316806, -5.1901724338475974, -4.9396281597021723, -5.0268433285699494, -4.9665359316501689, -5.1720734011794276, -5.0975580714352597, -4.9580451858518035, -5.0855804792754569, -5.0929912517099254, -5.0910852170647836, -4.9094760790093446, -4.9675777613858232, -5.1133933471692226, -4.8897036417289268, -5.0221071692773105, -4.9496184478003968, -5.0030191068123804, -4.8585096702484831, -4.9041926789176005, -4.8569418222447709, -5.0795583588705213, -4.9431242707668845, -4.946796820653832, -4.9156831016020899, -4.6799559214209081, -4.9466060067375706, -4.8402953897663021, -5.0243912895651901, -4.9231332680088267, -4.7409482305325126, -4.8236389189364406, -4.9242767007491608, -4.7981545382373394, -4.8657423212267004], "learn_time": [0.003625186284383138, 0.025006230672200522, 0.0246425191561381, 0.02458721399307251, 0.02472821871439616, 0.024438114960988362, 0.024676966667175292, 0.026229198773701987, 0.024262917041778565, 0.024610416094462077, 0.025015532970428467, 0.024349780877431233, 0.024993518988291424, 0.024475868542989096, 0.024368115266164145, 0.02436846892038981, 0.02465904951095581, 0.024649318059285483, 0.024258100986480714, 0.024279419581095377, 0.023987332979838055, 0.02469778060913086, 0.02403563658396403, 0.024892969926198324, 0.024641017119089764, 0.02467216650644938, 0.02500110069910685, 0.024505317211151123, 0.024424934387207033, 0.024818265438079835, 0.024743000666300457, 0.024369903405507407, 0.024000815550486245, 0.024568653106689452, 0.024718014399210612, 0.024279316266377766, 0.024635136127471924, 0.024568084875742593, 0.024363032976786294, 0.024988297621409097, 0.02465809981028239, 0.024555031458536783, 0.024452014764149984, 0.024538918336232503, 0.024657368659973145, 0.024703502655029297, 0.024538632233937582, 0.024154015382130942, 0.024948732058207194, 0.02437886397043864, 0.024434963862101238, 0.024582135677337646, 0.024349550406138103, 0.025050199031829833, 0.024519030253092447, 0.024688684940338136, 0.024404784043629963, 0.02495416800181071, 0.02455509901046753, 0.024440451463063558, 0.024680598576863607, 0.024267518520355226, 0.024526663621266685, 0.024381514390309653, 0.02448873519897461, 0.024760051568349203, 0.02415396769841512, 0.024069686730702717, 0.02434059778849284, 0.02451974948247274, 0.024179951349894205, 0.023959835370381672, 0.024478952089945476, 0.024569050470987955, 0.024373332659403484, 0.02458953062693278, 0.024830067157745363, 0.024383350213368734, 0.024453850587209065, 0.024189301331837974, 0.024752867221832276, 0.024729986985524494, 0.02415766716003418, 0.024371135234832763, 0.024277297655741374, 0.02440969944000244, 0.02435536781946818, 0.024549619356791178, 0.024276002248128255, 0.024185335636138915, 0.02437918186187744, 0.024322768052419026, 0.024486366907755533, 0.024339632193247477, 0.024700431029001872, 0.024253451824188234, 0.024650530020395914, 0.024243601163228354, 0.024612335364023845, 0.02409093379974365, 0.02423141400019328, 0.02478244702021281, 0.024994019667307535, 0.024709816773732504, 0.024489184220631916, 0.024637949466705323, 0.02498180071512858, 0.024101618925730386, 0.024757532278696696, 0.024396049976348876, 0.024364463488260903, 0.024311363697052002, 0.024177332719167072, 0.024469252427419028, 0.024561063448588053, 0.02467648188273112, 0.024775083859761557, 0.02412445147832235, 0.024071268240610757, 0.02441005309422811, 0.02453598181406657, 0.02413865327835083, 0.02425098419189453, 0.024218181769053142, 0.024420519669850666, 0.024300567309061685, 0.024596635500590006, 0.02405446767807007, 0.024438401063283283, 0.024474783738454183, 0.024347631136576335, 0.023933903376261393, 0.024102715651194255, 0.02446753184000651, 0.024426035086313882, 0.024582846959431966, 0.02419169743855794, 0.024527935187021892, 0.024545586109161376, 0.024451748530069987, 0.02441387971242269, 0.024153733253479005, 0.02426358461380005, 0.024377667903900148, 0.02392761707305908, 0.02423634926478068, 0.02515486478805542, 0.024364999930063882, 0.02382059892018636, 0.02418174743652344, 0.023916916052500407, 0.024787163734436034, 0.024125881989796958, 0.024863314628601075, 0.024261999130249023, 0.024392449855804445, 0.024445799986521403, 0.02430280049641927, 0.024129951000213624, 0.024403985341389975, 0.024235085646311442, 0.02419635057449341, 0.02427860101064046, 0.024652298291524252, 0.02391361395517985, 0.024445883433024087, 0.024881033102671306, 0.02422536611557007, 0.02451988458633423, 0.024403814474741617, 0.02447986602783203, 0.024285415808359783, 0.024439048767089844, 0.02441184918085734, 0.024057948589324953, 0.024431033929189046, 0.024439064661661784, 0.024386600653330485, 0.024290180206298827, 0.024286933739980063, 0.02425433397293091, 0.024329400062561034, 0.024375649293263753, 0.024375633398691813, 0.02443040211995443, 0.024389982223510742, 0.024657618999481202, 0.024285399913787843, 0.024832268555959065, 0.024629882971445718, 0.0241953174273173, 0.024085366725921632, 0.02440969944000244, 0.024594247341156006, 0.02468505303064982, 0.024099632104237875, 0.024250435829162597, 0.02458596626917521, 0.02410668134689331, 0.024229081471761067, 0.024229764938354492]} --------------------------------------------------------------------------------