├── GP(2).pdf ├── LICENSE ├── README.md ├── beest-for paper-t50 ├── Screenshot from 2019-05-03 20-35-37.png ├── dist0.png ├── dist1.png ├── dist2.png ├── dist3.png ├── multistep0.png ├── multistep1.png ├── multistep2.png ├── multistep3.png ├── onep0.png ├── onep1.png ├── onep2.png └── onep3.png ├── cartpole.py ├── cartpole ├── cartpole.png ├── onep0.png ├── onep1.png ├── onep2.png ├── onep3.png ├── onep4.png ├── onep5.png └── onep6.png ├── control ef ├── 3unresponisve- t20,n8 │ ├── dist0.png │ ├── dist1.png │ ├── dist2.png │ ├── dist3.png │ ├── dist4.png │ ├── dist5.png │ ├── dist6.png │ ├── dist7.png │ ├── multistep0.png │ ├── multistep1.png │ ├── multistep2.png │ ├── multistep3.png │ ├── multistep4.png │ ├── multistep5.png │ ├── multistep6.png │ ├── multistep7.png │ ├── onep0.png │ ├── onep1.png │ ├── onep2.png │ ├── onep3.png │ ├── onep4.png │ ├── onep5.png │ ├── onep6.png │ └── onep7.png ├── 3unresponsive-t50,n8 │ ├── Screenshot from 2019-05-03 13-47-27.png │ ├── dist0.png │ ├── dist1.png │ ├── dist2.png │ ├── dist3.png │ ├── dist4.png │ ├── dist5.png │ ├── dist6.png │ ├── dist7.png │ ├── multistep0.png │ ├── multistep1.png │ ├── multistep2.png │ ├── multistep3.png │ ├── multistep4.png │ ├── multistep5.png │ ├── multistep6.png │ ├── multistep7.png │ ├── onep0.png │ ├── onep1.png │ ├── onep2.png │ ├── onep3.png │ ├── onep4.png │ ├── onep5.png │ ├── onep6.png │ └── onep7.png ├── test1 │ ├── dist0.png │ ├── dist1.png │ ├── dist2.png │ ├── dist3.png │ └── dist4.png ├── test2 │ ├── dist0.png │ ├── dist1.png │ ├── dist2.png │ ├── dist3.png │ ├── multistep0.png │ ├── multistep1.png │ ├── multistep2.png │ ├── multistep3.png │ ├── onep0.png │ ├── onep1.png │ ├── onep2.png │ └── onep3.png ├── test3-fail │ ├── dist0.png │ ├── dist1.png │ ├── dist2.png │ ├── dist3.png │ ├── dist4.png │ ├── dist5.png │ ├── dist6.png │ ├── dist7.png │ ├── multistep0.png │ ├── multistep1.png │ ├── multistep2.png │ ├── multistep3.png │ ├── multistep4.png │ ├── multistep5.png │ ├── multistep6.png │ ├── onep0.png │ ├── onep1.png │ ├── onep2.png │ ├── onep3.png │ ├── onep4.png │ ├── onep5.png │ ├── onep6.png │ └── onep7.png ├── test4-T50,bf50, best - without time │ ├── dist0.png │ ├── dist1.png │ ├── dist2.png │ ├── dist3.png │ ├── dist4.png │ ├── dist5.png │ ├── dist6.png │ ├── dist7.png │ ├── multistep0.png │ ├── multistep1.png │ ├── multistep2.png │ ├── multistep3.png │ ├── multistep4.png │ ├── multistep5.png │ ├── multistep6.png │ ├── multistep7.png │ ├── onep0.png │ ├── onep1.png │ ├── onep2.png │ ├── onep3.png │ ├── onep4.png │ ├── onep5.png │ ├── onep6.png │ └── onep7.png ├── test5-T50,bf50 │ ├── Screenshot from 2019-05-01 19-31-45.png │ ├── dist0.png │ ├── dist1.png │ ├── dist2.png │ ├── dist3.png │ ├── multistep0.png │ ├── multistep1.png │ ├── multistep2.png │ ├── multistep3.png │ ├── onep0.png │ ├── onep1.png │ ├── onep2.png │ └── onep3.png └── test6-T20,bf50 │ ├── 20dist0.png │ ├── 20dist1.png │ ├── 20dist2.png │ ├── 20dist3.png │ ├── 20dist4.png │ ├── 20onep0.png │ ├── 20onep1.png │ ├── 20onep2.png │ ├── 20onep3.png │ ├── 20onep4.png │ ├── Screenshot from 2019-05-01 19-44-20.png │ ├── dist5.png │ ├── multistep0.png │ ├── multistep1.png │ ├── multistep2.png │ ├── multistep3.png │ ├── multistep4.png │ ├── multistep5.png │ └── onep5.png ├── controller.py ├── frame_0000000.png ├── last-ord ├── Screenshot from 2019-05-04 00-31-31.png ├── dist0.png ├── dist1.png ├── dist2.png ├── dist3.png ├── multistep0.png ├── multistep1.png ├── multistep2.png ├── multistep3.png ├── onep0.png ├── onep1.png ├── onep2.png └── onep3.png ├── mgpr.py ├── pilco.py ├── q+x - t50 ├── dist0.png ├── dist1.png ├── dist2.png ├── multistep0.png ├── multistep1.png ├── multistep2.png ├── onep0.png ├── onep1.png └── onep2.png ├── reward.py ├── test.py ├── test1 ├── dist0.png ├── dist1.png ├── dist2.png ├── dist3.png ├── dist4.png ├── dist5.png ├── multistep0.png ├── multistep1.png ├── multistep2.png ├── multistep3.png ├── multistep4.png ├── multistep5.png ├── onep0.png ├── onep1.png ├── onep2.png ├── onep3.png ├── onep4.png └── onep5.png ├── test2 ├── dist0.png ├── dist1.png ├── dist2.png ├── dist3.png ├── multistep0.png ├── multistep1.png ├── multistep2.png ├── multistep3.png ├── onep0.png ├── onep1.png ├── onep2.png └── onep3.png ├── unresponsive-paper-t25 ├── 20dist0.png ├── 20dist1.png ├── 20dist2.png ├── 20dist3.png ├── 20onep0.png ├── 20onep1.png ├── 20onep2.png ├── 20onep3.png ├── multistep0.png ├── multistep1.png ├── multistep2.png └── multistep3.png ├── unresponsive.py ├── unresponsive ├── 20dist0.png ├── 20dist1.png ├── 20dist2.png ├── 20dist3.png ├── 20onep0.png ├── 20onep1.png ├── 20onep2.png ├── 20onep3.png ├── Screenshot from 2019-05-03 21-03-56.png ├── multistep0.png ├── multistep1.png ├── multistep2.png └── multistep3.png └── unresponsive2-fai ├── dist0.png ├── dist1.png ├── dist2.png ├── dist3.png ├── dist4.png ├── dist5.png ├── multistep0.png ├── multistep1.png ├── multistep2.png ├── multistep3.png ├── multistep4.png ├── multistep5.png ├── onep0.png ├── onep1.png ├── onep2.png ├── onep3.png ├── onep4.png └── onep5.png /GP(2).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/GP(2).pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Ali Younes 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data-efficient-RL 2 | PILCO, DeepPILCO .. 3 | -------------------------------------------------------------------------------- /beest-for paper-t50/Screenshot from 2019-05-03 20-35-37.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/Screenshot from 2019-05-03 20-35-37.png -------------------------------------------------------------------------------- /beest-for paper-t50/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/dist0.png -------------------------------------------------------------------------------- /beest-for paper-t50/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/dist1.png -------------------------------------------------------------------------------- /beest-for paper-t50/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/dist2.png -------------------------------------------------------------------------------- /beest-for paper-t50/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/dist3.png -------------------------------------------------------------------------------- /beest-for paper-t50/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/multistep0.png -------------------------------------------------------------------------------- /beest-for paper-t50/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/multistep1.png -------------------------------------------------------------------------------- /beest-for paper-t50/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/multistep2.png -------------------------------------------------------------------------------- /beest-for paper-t50/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/multistep3.png -------------------------------------------------------------------------------- /beest-for paper-t50/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/onep0.png -------------------------------------------------------------------------------- /beest-for paper-t50/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/onep1.png -------------------------------------------------------------------------------- /beest-for paper-t50/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/onep2.png -------------------------------------------------------------------------------- /beest-for paper-t50/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/beest-for paper-t50/onep3.png -------------------------------------------------------------------------------- /cartpole.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | import tensorflow as tf 4 | import gpflow 5 | from gpflow import autoflow 6 | from gym.wrappers import Monitor 7 | 8 | from pilco import PILCO 9 | 10 | from controller import RbfController 11 | from reward import ExponentialReward 12 | import time 13 | import matplotlib 14 | import matplotlib.pyplot as plt 15 | 16 | float_type = gpflow.settings.dtypes.float_type 17 | 18 | np.random.seed(0) 19 | 20 | ## env -> rollout and expirement 21 | class EnvWrapper(): 22 | def __init__(self): 23 | self.env = gym.make('CartPole-v0') 24 | 25 | def step(self, action): 26 | ob,_,done,_=self.env.step(action) 27 | return np.hstack((ob[:2],np.sin(ob[2]),np.cos(ob[2]),ob[-1])),done 28 | 29 | def reset(self): 30 | ob=self.env.reset() 31 | return np.hstack((ob[:2],np.sin(ob[2]),np.cos(ob[2]),ob[-1])) 32 | 33 | def render(self): 34 | self.env.render() 35 | time.sleep(1/30) 36 | 37 | 38 | 39 | i=0 40 | def rollout(env,T,random=False): 41 | start=time.time() 42 | X = [] 43 | Y = [] 44 | x=env.reset() 45 | env.render() 46 | for t in range(T): 47 | if random: 48 | u = env.env.action_space.sample() 49 | else: 50 | a = pilco.compute_action(x[None, :])[0, :] 51 | u=int(np.round(a[0])) 52 | new_x,done= env.step(u) 53 | env.render() 54 | X.append(np.hstack((x, u))) 55 | Y.append(new_x-x) 56 | x=new_x 57 | if done: 58 | break 59 | end=time.time() 60 | print ("time on real robot= %.1f s"%(end-start)) 61 | return np.stack(X),np.stack(Y),end-start 62 | 63 | @autoflow((float_type,[None, None]), (float_type,[None, None])) 64 | def predict_one_step_wrapper(mgpr, m, s): 65 | return mgpr.predict_on_noisy_inputs(m, s) 66 | 67 | 68 | @autoflow((float_type,[None, None]), (float_type,[None, None]), (np.int32, [])) 69 | def predict_trajectory_wrapper(pilco, m, s, horizon): 70 | return pilco.predict(m, s, horizon) 71 | 72 | 73 | @autoflow((float_type,[None, None]), (float_type,[None, None])) 74 | def compute_action_wrapper(pilco, m, s): 75 | return pilco.controller.compute_action(m, s) 76 | 77 | 78 | @autoflow((float_type, [None, None]), (float_type, [None, None])) 79 | def reward_wrapper(reward, m, s): 80 | return reward.compute_reward(m, s) 81 | 82 | def plot(pilco,X,Y,T,trial): 83 | fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(12, 6)) 84 | axes[0].set_title('One step prediction - Trial#%d' % trial) 85 | axes[2].set_xlabel('t') 86 | axes[1].set_ylabel('x') 87 | for i, m in enumerate(pilco.mgpr.models): 88 | y_pred_test, var_pred_test = m.predict_y(X) 89 | axes[i].plot(range(len(y_pred_test)), y_pred_test, Y[:, i]) 90 | axes[i].fill_between(range(len(y_pred_test)), 91 | y_pred_test[:, 0] - 2 * np.sqrt(var_pred_test[:, 0]), 92 | y_pred_test[:, 0] + 2 * np.sqrt(var_pred_test[:, 0]), alpha=0.3) 93 | 94 | plt.savefig("onep%d.png" % trial) 95 | plt.show() 96 | 97 | with tf.Session() as sess: 98 | p_start=time.time() 99 | env = EnvWrapper() 100 | T=50 101 | num_basis_functions = 10 102 | max_action = 1.0 103 | time_on_real_robot = 0 104 | X,Y,t=rollout(env,T,random=True) 105 | time_on_real_robot += t 106 | state_dim = Y.shape[1] 107 | control_dim = X.shape[1] - Y.shape[1] 108 | controller = RbfController(state_dim,control_dim, num_basis_functions, max_action) 109 | reward = ExponentialReward(state_dim,t=np.array([0.0,0.0,0.0,1.0,0.0])) 110 | pilco=PILCO(X,Y,controller=controller,reward=reward) 111 | plot(pilco,X,Y,T,0) 112 | n=7 113 | t_model=0 114 | t_policy=0 115 | for i in range(1,n): 116 | env.reset() 117 | t1 = time.time() 118 | pilco.optimize_models() 119 | t2 = time.time() 120 | t_model+=t2-t1 121 | print("model optimization done!") 122 | pilco.optimize_policy() 123 | t3 = time.time() 124 | t_policy+=t3-t2 125 | print("policy optimization done!") 126 | X_,Y_,t=rollout(env,T) 127 | time_on_real_robot += t 128 | plot(pilco,X_,Y_,T,i) 129 | X=np.vstack((X,X_[:T, :])) 130 | X=X[:2*T] 131 | Y=np.vstack((Y,Y_[:T, :])) 132 | Y=Y[:2*T] 133 | pilco.mgpr.set_XY(X,Y) 134 | print("t_robot= %.2f s" %time_on_real_robot) 135 | print("t_model= %.2f s" %t_model) 136 | print("t_policy= %.2f s" %t_policy) 137 | print("program running time = %d s" %(time.time()-p_start)) -------------------------------------------------------------------------------- /cartpole/cartpole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/cartpole/cartpole.png -------------------------------------------------------------------------------- /cartpole/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/cartpole/onep0.png -------------------------------------------------------------------------------- /cartpole/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/cartpole/onep1.png -------------------------------------------------------------------------------- /cartpole/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/cartpole/onep2.png -------------------------------------------------------------------------------- /cartpole/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/cartpole/onep3.png -------------------------------------------------------------------------------- /cartpole/onep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/cartpole/onep4.png -------------------------------------------------------------------------------- /cartpole/onep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/cartpole/onep5.png -------------------------------------------------------------------------------- /cartpole/onep6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/cartpole/onep6.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/dist0.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/dist1.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/dist2.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/dist3.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/dist4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/dist4.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/dist5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/dist5.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/dist6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/dist6.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/dist7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/dist7.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/multistep0.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/multistep1.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/multistep2.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/multistep3.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/multistep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/multistep4.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/multistep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/multistep5.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/multistep6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/multistep6.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/multistep7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/multistep7.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/onep0.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/onep1.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/onep2.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/onep3.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/onep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/onep4.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/onep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/onep5.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/onep6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/onep6.png -------------------------------------------------------------------------------- /control ef/3unresponisve- t20,n8/onep7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponisve- t20,n8/onep7.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/Screenshot from 2019-05-03 13-47-27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/Screenshot from 2019-05-03 13-47-27.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/dist0.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/dist1.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/dist2.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/dist3.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/dist4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/dist4.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/dist5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/dist5.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/dist6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/dist6.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/dist7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/dist7.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/multistep0.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/multistep1.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/multistep2.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/multistep3.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/multistep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/multistep4.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/multistep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/multistep5.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/multistep6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/multistep6.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/multistep7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/multistep7.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/onep0.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/onep1.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/onep2.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/onep3.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/onep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/onep4.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/onep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/onep5.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/onep6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/onep6.png -------------------------------------------------------------------------------- /control ef/3unresponsive-t50,n8/onep7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/3unresponsive-t50,n8/onep7.png -------------------------------------------------------------------------------- /control ef/test1/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test1/dist0.png -------------------------------------------------------------------------------- /control ef/test1/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test1/dist1.png -------------------------------------------------------------------------------- /control ef/test1/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test1/dist2.png -------------------------------------------------------------------------------- /control ef/test1/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test1/dist3.png -------------------------------------------------------------------------------- /control ef/test1/dist4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test1/dist4.png -------------------------------------------------------------------------------- /control ef/test2/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/dist0.png -------------------------------------------------------------------------------- /control ef/test2/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/dist1.png -------------------------------------------------------------------------------- /control ef/test2/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/dist2.png -------------------------------------------------------------------------------- /control ef/test2/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/dist3.png -------------------------------------------------------------------------------- /control ef/test2/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/multistep0.png -------------------------------------------------------------------------------- /control ef/test2/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/multistep1.png -------------------------------------------------------------------------------- /control ef/test2/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/multistep2.png -------------------------------------------------------------------------------- /control ef/test2/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/multistep3.png -------------------------------------------------------------------------------- /control ef/test2/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/onep0.png -------------------------------------------------------------------------------- /control ef/test2/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/onep1.png -------------------------------------------------------------------------------- /control ef/test2/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/onep2.png -------------------------------------------------------------------------------- /control ef/test2/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test2/onep3.png -------------------------------------------------------------------------------- /control ef/test3-fail/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/dist0.png -------------------------------------------------------------------------------- /control ef/test3-fail/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/dist1.png -------------------------------------------------------------------------------- /control ef/test3-fail/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/dist2.png -------------------------------------------------------------------------------- /control ef/test3-fail/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/dist3.png -------------------------------------------------------------------------------- /control ef/test3-fail/dist4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/dist4.png -------------------------------------------------------------------------------- /control ef/test3-fail/dist5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/dist5.png -------------------------------------------------------------------------------- /control ef/test3-fail/dist6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/dist6.png -------------------------------------------------------------------------------- /control ef/test3-fail/dist7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/dist7.png -------------------------------------------------------------------------------- /control ef/test3-fail/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/multistep0.png -------------------------------------------------------------------------------- /control ef/test3-fail/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/multistep1.png -------------------------------------------------------------------------------- /control ef/test3-fail/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/multistep2.png -------------------------------------------------------------------------------- /control ef/test3-fail/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/multistep3.png -------------------------------------------------------------------------------- /control ef/test3-fail/multistep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/multistep4.png -------------------------------------------------------------------------------- /control ef/test3-fail/multistep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/multistep5.png -------------------------------------------------------------------------------- /control ef/test3-fail/multistep6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/multistep6.png -------------------------------------------------------------------------------- /control ef/test3-fail/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/onep0.png -------------------------------------------------------------------------------- /control ef/test3-fail/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/onep1.png -------------------------------------------------------------------------------- /control ef/test3-fail/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/onep2.png -------------------------------------------------------------------------------- /control ef/test3-fail/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/onep3.png -------------------------------------------------------------------------------- /control ef/test3-fail/onep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/onep4.png -------------------------------------------------------------------------------- /control ef/test3-fail/onep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/onep5.png -------------------------------------------------------------------------------- /control ef/test3-fail/onep6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/onep6.png -------------------------------------------------------------------------------- /control ef/test3-fail/onep7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test3-fail/onep7.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/dist0.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/dist1.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/dist2.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/dist3.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/dist4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/dist4.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/dist5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/dist5.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/dist6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/dist6.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/dist7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/dist7.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/multistep0.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/multistep1.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/multistep2.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/multistep3.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/multistep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/multistep4.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/multistep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/multistep5.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/multistep6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/multistep6.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/multistep7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/multistep7.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/onep0.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/onep1.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/onep2.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/onep3.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/onep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/onep4.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/onep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/onep5.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/onep6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/onep6.png -------------------------------------------------------------------------------- /control ef/test4-T50,bf50, best - without time/onep7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test4-T50,bf50, best - without time/onep7.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/Screenshot from 2019-05-01 19-31-45.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/Screenshot from 2019-05-01 19-31-45.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/dist0.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/dist1.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/dist2.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/dist3.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/multistep0.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/multistep1.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/multistep2.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/multistep3.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/onep0.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/onep1.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/onep2.png -------------------------------------------------------------------------------- /control ef/test5-T50,bf50/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test5-T50,bf50/onep3.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20dist0.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20dist1.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20dist2.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20dist3.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20dist4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20dist4.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20onep0.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20onep1.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20onep2.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20onep3.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/20onep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/20onep4.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/Screenshot from 2019-05-01 19-44-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/Screenshot from 2019-05-01 19-44-20.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/dist5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/dist5.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/multistep0.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/multistep1.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/multistep2.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/multistep3.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/multistep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/multistep4.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/multistep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/multistep5.png -------------------------------------------------------------------------------- /control ef/test6-T20,bf50/onep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/control ef/test6-T20,bf50/onep5.png -------------------------------------------------------------------------------- /controller.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import gpflow 4 | 5 | from mgpr import MGPR 6 | from gpflow import settings 7 | float_type = settings.dtypes.float_type 8 | 9 | def squash_sin(m, s, max_action=None): 10 | ''' 11 | Squashing function, passing the controls mean and variance 12 | through a sinus, as in gSin.m. The output is in [-max_action, max_action]. 13 | IN: mean (m) and variance(s) of the control input, max_action 14 | OUT: mean (M) variance (S) and input-output (C) covariance of the squashed 15 | control input 16 | ''' 17 | k = tf.shape(m)[1] 18 | if max_action is None: 19 | max_action = tf.ones((1,k), dtype=float_type) #squashes in [-1,1] by default 20 | else: 21 | max_action = max_action * tf.ones((1,k), dtype=float_type) 22 | 23 | M = max_action * tf.exp(-tf.diag_part(s) / 2) * tf.sin(m) 24 | 25 | lq = -(tf.diag_part(s)[:, None] + tf.diag_part(s)[None, :]) / 2 26 | q = tf.exp(lq) 27 | S = (tf.exp(lq + s) - q) * tf.cos(tf.transpose(m) - m) \ 28 | - (tf.exp(lq - s) - q) * tf.cos(tf.transpose(m) + m) 29 | S = max_action * tf.transpose(max_action) * S / 2 30 | 31 | C = max_action * tf.diag( tf.exp(-tf.diag_part(s)/2) * tf.cos(m)) 32 | return M, S, tf.reshape(C,shape=[k,k]) 33 | 34 | class FakeGPR(gpflow.Parameterized): 35 | def __init__(self, X, Y, kernel): 36 | gpflow.Parameterized.__init__(self) 37 | self.X = gpflow.Param(X) 38 | self.Y = gpflow.Param(Y) 39 | self.kern = kernel 40 | self.likelihood = gpflow.likelihoods.Gaussian() 41 | 42 | class RbfController(MGPR): 43 | ''' 44 | An RBF Controller implemented as a deterministic GP 45 | See Deisenroth et al 2015: Gaussian Processes for Data-Efficient Learning in Robotics and Control 46 | Section 5.3.2. 47 | ''' 48 | def __init__(self, state_dim, control_dim, num_basis_functions, max_action=None): 49 | MGPR.__init__(self, 50 | np.random.randn(num_basis_functions, state_dim), 51 | 0.1*np.random.randn(num_basis_functions, control_dim) 52 | ) 53 | for model in self.models: 54 | model.kern.variance = 1.0 55 | model.kern.variance.trainable = False 56 | self.max_action = max_action 57 | 58 | def create_models(self, X, Y): 59 | self.models = gpflow.params.ParamList([]) 60 | for i in range(self.num_outputs): 61 | kern = gpflow.kernels.RBF(input_dim=X.shape[1], ARD=True) 62 | self.models.append(FakeGPR(X, Y[:, i:i+1], kern)) 63 | 64 | def compute_action(self, m, s, squash=True): 65 | ''' 66 | RBF Controller. See Deisenroth's Thesis Section 67 | IN: mean (m) and variance (s) of the state 68 | OUT: mean (M) and variance (S) of the action 69 | ''' 70 | iK, beta = self.calculate_factorizations() 71 | M, S, V = self.predict_given_factorizations(m, s, 0.0 * iK, beta) 72 | S = S - tf.diag(self.variance - 1e-6) 73 | if squash: 74 | M, S, V2 = squash_sin(M, S, self.max_action) 75 | V = V @ V2 76 | return M, S, V 77 | -------------------------------------------------------------------------------- /frame_0000000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/frame_0000000.png -------------------------------------------------------------------------------- /last-ord/Screenshot from 2019-05-04 00-31-31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/Screenshot from 2019-05-04 00-31-31.png -------------------------------------------------------------------------------- /last-ord/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/dist0.png -------------------------------------------------------------------------------- /last-ord/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/dist1.png -------------------------------------------------------------------------------- /last-ord/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/dist2.png -------------------------------------------------------------------------------- /last-ord/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/dist3.png -------------------------------------------------------------------------------- /last-ord/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/multistep0.png -------------------------------------------------------------------------------- /last-ord/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/multistep1.png -------------------------------------------------------------------------------- /last-ord/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/multistep2.png -------------------------------------------------------------------------------- /last-ord/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/multistep3.png -------------------------------------------------------------------------------- /last-ord/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/onep0.png -------------------------------------------------------------------------------- /last-ord/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/onep1.png -------------------------------------------------------------------------------- /last-ord/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/onep2.png -------------------------------------------------------------------------------- /last-ord/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/last-ord/onep3.png -------------------------------------------------------------------------------- /mgpr.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import gpflow 3 | import numpy as np 4 | float_type = gpflow.settings.dtypes.float_type 5 | 6 | class MGPR(gpflow.Parameterized): 7 | def __init__(self, X, Y, name=None): 8 | super(MGPR, self).__init__(name) 9 | 10 | self.num_outputs = Y.shape[1] 11 | self.num_dims = X.shape[1] 12 | self.num_datapoints = X.shape[0] 13 | 14 | self.create_models(X, Y) 15 | self.optimizers = [] 16 | 17 | def create_models(self, X, Y): 18 | self.models = [] 19 | for i in range(self.num_outputs): 20 | kern = gpflow.kernels.RBF(input_dim=X.shape[1], ARD=True) 21 | kern.lengthscales.prior = gpflow.priors.Gamma(1,10) # priors have to be included before 22 | kern.variance.prior = gpflow.priors.Gamma(1.5,2) # before the model gets compiled 23 | self.models.append(gpflow.models.GPR(X, Y[:, i:i+1], kern)) 24 | self.models[i].clear() 25 | self.models[i].compile() 26 | 27 | def set_XY(self, X, Y): 28 | for i in range(len(self.models)): 29 | self.models[i].X = X 30 | self.models[i].Y = Y[:, i:i+1] 31 | 32 | def optimize(self, restarts=0): 33 | if len(self.optimizers) == 0: # This is the first call to optimize(); 34 | for model in self.models: 35 | # Create an gpflow.train.ScipyOptimizer object for every model embedded in mgpr 36 | optimizer = gpflow.train.ScipyOptimizer(method='L-BFGS-B') 37 | optimizer.minimize(model) 38 | self.optimizers.append(optimizer) 39 | restarts -= 1 40 | 41 | for model, optimizer in zip(self.models, self.optimizers): 42 | session = optimizer._model.enquire_session(None) 43 | best_parameters = model.read_values(session=session) 44 | best_likelihood = model.compute_log_likelihood() 45 | for restart in range(restarts): 46 | #randomize(model) 47 | optimizer._optimizer.minimize(session=session, 48 | feed_dict=optimizer._gen_feed_dict(optimizer._model, None), 49 | step_callback=None) 50 | likelihood = model.compute_log_likelihood() 51 | if likelihood > best_likelihood: 52 | best_parameters = model.read_values(session=session) 53 | best_likelihood = likelihood 54 | model.assign(best_parameters) 55 | 56 | def predict_on_noisy_inputs(self, m, s): 57 | iK, beta = self.calculate_factorizations() 58 | return self.predict_given_factorizations(m, s, iK, beta) 59 | 60 | def calculate_factorizations(self): 61 | K = self.K(self.X) 62 | batched_eye = tf.eye(tf.shape(self.X)[0], batch_shape=[self.num_outputs], dtype=float_type) 63 | L = tf.cholesky(K + self.noise[:, None, None]*batched_eye) 64 | iK = tf.cholesky_solve(L, batched_eye) 65 | Y_ = tf.transpose(self.Y)[:, :, None] 66 | # Why do we transpose Y? Maybe we need to change the definition of self.Y() or beta? 67 | beta = tf.cholesky_solve(L, Y_)[:, :, 0] 68 | return iK, beta 69 | 70 | def predict_given_factorizations(self, m, s, iK, beta): 71 | """ 72 | Approximate GP regression at noisy inputs via moment matching 73 | IN: mean (m) (row vector) and (s) variance of the state 74 | OUT: mean (M) (row vector), variance (S) of the action 75 | and inv(s)*input-ouputcovariance 76 | """ 77 | 78 | s = tf.tile(s[None, None, :, :], [self.num_outputs, self.num_outputs, 1, 1]) 79 | inp = tf.tile(self.centralized_input(m)[None, :, :], [self.num_outputs, 1, 1]) 80 | 81 | # Calculate M and V: mean and inv(s) times input-output covariance 82 | iL = tf.matrix_diag(1/self.lengthscales) 83 | iN = inp @ iL 84 | B = iL @ s[0, ...] @ iL + tf.eye(self.num_dims, dtype=float_type) 85 | 86 | # Redefine iN as in^T and t --> t^T 87 | # B is symmetric so its the same 88 | t = tf.linalg.transpose( 89 | tf.matrix_solve(B, tf.linalg.transpose(iN), adjoint=True), 90 | ) 91 | 92 | lb = tf.exp(-tf.reduce_sum(iN * t, -1)/2) * beta 93 | tiL = t @ iL 94 | c = self.variance / tf.sqrt(tf.linalg.det(B)) 95 | 96 | M = (tf.reduce_sum(lb, -1) * c)[:, None] 97 | V = tf.matmul(tiL, lb[:, :, None], adjoint_a=True)[..., 0] * c[:, None] 98 | 99 | # Calculate S: Predictive Covariance 100 | R = s @ tf.matrix_diag( 101 | 1/tf.square(self.lengthscales[None, :, :]) + 102 | 1/tf.square(self.lengthscales[:, None, :]) 103 | ) + tf.eye(self.num_dims, dtype=float_type) 104 | 105 | X = inp[None, :, :, :]/tf.square(self.lengthscales[:, None, None, :]) 106 | X2 = -inp[:, None, :, :]/tf.square(self.lengthscales[None, :, None, :]) 107 | Q = tf.matrix_solve(R, s)/2 108 | Xs = tf.reduce_sum(X @ Q * X, -1) 109 | X2s = tf.reduce_sum(X2 @ Q * X2, -1) 110 | maha = -2 * tf.matmul(X @ Q, X2, adjoint_b=True) + \ 111 | Xs[:, :, :, None] + X2s[:, :, None, :] 112 | # 113 | k = tf.log(self.variance)[:, None] - \ 114 | tf.reduce_sum(tf.square(iN), -1)/2 115 | L = tf.exp(k[:, None, :, None] + k[None, :, None, :] + maha) 116 | S = (tf.tile(beta[:, None, None, :], [1, self.num_outputs, 1, 1]) 117 | @ L @ 118 | tf.tile(beta[None, :, :, None], [self.num_outputs, 1, 1, 1]) 119 | )[:, :, 0, 0] 120 | 121 | diagL = tf.transpose(tf.linalg.diag_part(tf.transpose(L))) 122 | S = S - tf.diag(tf.reduce_sum(tf.multiply(iK, diagL), [1, 2])) 123 | S = S / tf.sqrt(tf.linalg.det(R)) 124 | S = S + tf.diag(self.variance) 125 | S = S - M @ tf.transpose(M) 126 | 127 | return tf.transpose(M), S, tf.transpose(V) 128 | 129 | def centralized_input(self, m): 130 | return self.X - m 131 | 132 | def K(self, X1, X2=None): 133 | return tf.stack( 134 | [model.kern.K(X1, X2) for model in self.models] 135 | ) 136 | 137 | @property 138 | def Y(self): 139 | return tf.concat( 140 | [model.Y.parameter_tensor for model in self.models], 141 | axis = 1 142 | ) 143 | 144 | @property 145 | def X(self): 146 | return self.models[0].X.parameter_tensor 147 | 148 | @property 149 | def lengthscales(self): 150 | return tf.stack( 151 | [model.kern.lengthscales.constrained_tensor for model in self.models] 152 | ) 153 | 154 | @property 155 | def variance(self): 156 | return tf.stack( 157 | [model.kern.variance.constrained_tensor for model in self.models] 158 | ) 159 | 160 | @property 161 | def noise(self): 162 | return tf.stack( 163 | [model.likelihood.variance.constrained_tensor for model in self.models] 164 | ) 165 | -------------------------------------------------------------------------------- /pilco.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import gpflow 4 | import pandas as pd 5 | import time 6 | 7 | from mgpr import MGPR 8 | from controller import RbfController 9 | from reward import ExponentialReward 10 | 11 | float_type = gpflow.settings.dtypes.float_type 12 | 13 | 14 | class PILCO(gpflow.models.Model): 15 | def __init__(self, X, Y, num_induced_points=None, horizon=100, controller=None, 16 | reward=None, m_init=None, S_init=None, name=None): 17 | super(PILCO, self).__init__(name) 18 | self.mgpr = MGPR(X, Y) 19 | self.state_dim = Y.shape[1] 20 | self.control_dim = X.shape[1] - Y.shape[1] 21 | self.horizon = horizon 22 | self.controller = controller 23 | self.reward = reward 24 | self.m_init = X[0:1, 0:self.state_dim] 25 | self.S_init = np.diag(np.ones(self.state_dim) * 0.1) 26 | self.optimizer = None 27 | 28 | @gpflow.name_scope('likelihood') 29 | def _build_likelihood(self): 30 | # This is for tuning controller's parameters 31 | reward = self.predict(self.m_init, self.S_init, self.horizon)[2] 32 | return reward 33 | 34 | def optimize_models(self, maxiter=200, restarts=1): 35 | ''' 36 | Optimize GP models 37 | ''' 38 | self.mgpr.optimize(restarts=restarts) 39 | # Print the resulting model parameters 40 | lengthscales = {}; variances = {}; noises = {}; 41 | i = 0 42 | for model in self.mgpr.models: 43 | lengthscales['GP' + str(i)] = model.kern.lengthscales.value 44 | variances['GP' + str(i)] = np.array([model.kern.variance.value]) 45 | noises['GP' + str(i)] = np.array([model.likelihood.variance.value]) 46 | i += 1 47 | print('-----Learned models------') 48 | pd.set_option('precision', 3) 49 | print('---Lengthscales---') 50 | print(pd.DataFrame(data=lengthscales)) 51 | print('---Variances---') 52 | print(pd.DataFrame(data=variances)) 53 | print('---Noises---') 54 | print(pd.DataFrame(data=noises)) 55 | 56 | def optimize_policy(self, maxiter=30, restarts=0): 57 | ''' 58 | Optimize controller's parameter's 59 | ''' 60 | start = time.time() 61 | if not self.optimizer: 62 | self.optimizer = gpflow.train.ScipyOptimizer(method="L-BFGS-B") 63 | start = time.time() 64 | self.optimizer.minimize(self, maxiter=maxiter) 65 | end = time.time() 66 | print("Controller's optimization: done in %.1f seconds with reward=%.3f." % (end - start, self.compute_reward())) 67 | session = self.optimizer._model.enquire_session(None) 68 | start = time.time() 69 | self.optimizer._optimizer.minimize(session=session, 70 | feed_dict=self.optimizer._gen_feed_dict(self.optimizer._model, None), 71 | step_callback=None) 72 | end = time.time() 73 | print("Controller's optimization: done in %.1f seconds with reward=%.3f." % (end - start, self.compute_reward())) 74 | best_parameters = self.read_values(session=session) 75 | self.assign(best_parameters) 76 | 77 | @gpflow.autoflow((float_type,[None, None])) 78 | def compute_action(self, x_m): 79 | return self.controller.compute_action(x_m, tf.zeros([self.state_dim, self.state_dim], float_type))[0] 80 | 81 | def predict(self, m_x, s_x, n): 82 | loop_vars = [ 83 | tf.constant(0, tf.int32), 84 | m_x, 85 | s_x, 86 | tf.constant([[0]], float_type) 87 | ] 88 | 89 | _, m_x, s_x, reward = tf.while_loop( 90 | # Termination condition 91 | lambda j, m_x, s_x, reward: j < n, 92 | # Body function 93 | lambda j, m_x, s_x, reward: ( 94 | j + 1, 95 | *self.propagate(m_x, s_x), 96 | tf.add(reward, self.reward.compute_reward(m_x, s_x)[0]) 97 | ), loop_vars 98 | ) 99 | 100 | return m_x, s_x, reward 101 | 102 | def propagate(self, m_x, s_x): 103 | m_u, s_u, c_xu = self.controller.compute_action(m_x, s_x) 104 | 105 | m = tf.concat([m_x, m_u], axis=1) 106 | s1 = tf.concat([s_x, s_x@c_xu], axis=1) 107 | s2 = tf.concat([tf.transpose(s_x@c_xu), s_u], axis=1) 108 | s = tf.concat([s1, s2], axis=0) 109 | 110 | M_dx, S_dx, C_dx = self.mgpr.predict_on_noisy_inputs(m, s) 111 | M_x = M_dx + m_x 112 | S_x = S_dx + s_x + s1@C_dx + tf.matmul(C_dx, s1, transpose_a=True, transpose_b=True) 113 | 114 | # While-loop requires the shapes of the outputs to be fixed 115 | M_x.set_shape([1, self.state_dim]); S_x.set_shape([self.state_dim, self.state_dim]) 116 | return M_x, S_x 117 | 118 | @gpflow.autoflow() 119 | def compute_reward(self): 120 | return self._build_likelihood() 121 | -------------------------------------------------------------------------------- /q+x - t50/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/q+x - t50/dist0.png -------------------------------------------------------------------------------- /q+x - t50/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/q+x - t50/dist1.png -------------------------------------------------------------------------------- /q+x - t50/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/q+x - t50/dist2.png -------------------------------------------------------------------------------- /q+x - t50/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/q+x - t50/multistep0.png -------------------------------------------------------------------------------- /q+x - t50/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/q+x - t50/multistep1.png -------------------------------------------------------------------------------- /q+x - t50/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/q+x - t50/multistep2.png -------------------------------------------------------------------------------- /q+x - t50/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/q+x - t50/onep0.png -------------------------------------------------------------------------------- /q+x - t50/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/q+x - t50/onep1.png -------------------------------------------------------------------------------- /q+x - t50/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/q+x - t50/onep2.png -------------------------------------------------------------------------------- /reward.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import tensorflow as tf 3 | from gpflow import Parameterized, Param, params_as_tensors, settings 4 | import numpy as np 5 | 6 | float_type = settings.dtypes.float_type 7 | 8 | 9 | class Reward(Parameterized): 10 | def __init__(self): 11 | Parameterized.__init__(self) 12 | 13 | @abc.abstractmethod 14 | def compute_reward(self, m, s): 15 | raise NotImplementedError 16 | 17 | 18 | class ExponentialReward(Reward): 19 | def __init__(self, state_dim, W=None, t=None): 20 | Reward.__init__(self) 21 | self.state_dim = state_dim 22 | if W is not None: 23 | self.W = Param(np.reshape(W, (state_dim, state_dim)), trainable=False) 24 | else: 25 | self.W = Param(np.eye(state_dim), trainable=False) 26 | if t is not None: 27 | self.t = Param(np.reshape(t, (1, state_dim)), trainable=False) 28 | else: 29 | self.t = Param(np.zeros((1, state_dim)), trainable=False) 30 | 31 | @params_as_tensors 32 | def compute_reward(self, m, s): 33 | ''' 34 | Reward function, calculating mean and variance of rewards, given 35 | mean and variance of state distribution, along with the target State 36 | and a weight matrix. 37 | Input m : [1, k] 38 | Input s : [k, k] 39 | 40 | Output M : [1, 1] 41 | Output S : [1, 1] 42 | ''' 43 | # for robot arm 44 | m=m[:,:3] 45 | s=s[:3,:3] 46 | 47 | SW = s @ self.W 48 | 49 | iSpW = tf.transpose( 50 | tf.matrix_solve( (tf.eye(self.state_dim, dtype=float_type) + SW), 51 | tf.transpose(self.W), adjoint=True)) 52 | 53 | muR = tf.exp(-(m-self.t) @ iSpW @ tf.transpose(m-self.t)/2) / \ 54 | tf.sqrt( tf.linalg.det(tf.eye(self.state_dim, dtype=float_type) + SW) ) 55 | 56 | i2SpW = tf.transpose( 57 | tf.matrix_solve( (tf.eye(self.state_dim, dtype=float_type) + 2*SW), 58 | tf.transpose(self.W), adjoint=True)) 59 | 60 | r2 = tf.exp(-(m-self.t) @ i2SpW @ tf.transpose(m-self.t)) / \ 61 | tf.sqrt( tf.linalg.det(tf.eye(self.state_dim, dtype=float_type) + 2*SW) ) 62 | 63 | sR = r2 - muR @ muR 64 | muR.set_shape([1, 1]) 65 | sR.set_shape([1, 1]) 66 | return muR, sR -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | import tensorflow as tf 4 | import gpflow 5 | from gpflow import autoflow 6 | from gym.wrappers import Monitor 7 | 8 | from pilco import PILCO 9 | 10 | from controller import RbfController 11 | from reward import ExponentialReward 12 | import time 13 | import matplotlib 14 | import matplotlib.pyplot as plt 15 | 16 | float_type = gpflow.settings.dtypes.float_type 17 | 18 | np.random.seed(0) 19 | 20 | ## env -> rollout and expirement 21 | class EnvWrapper(): 22 | def __init__(self,target): 23 | self.env = gym.make('FetchReach-v1').env 24 | self.env.goal = target 25 | self.action_space = self.env.action_space 26 | self.observation_space = self.env.observation_space 27 | self.target=target 28 | self.distance_threshold=0.01 29 | self.q=[0.0,-1.0,0.0,2.0,0.0,0.5,0.0] 30 | 31 | def step(self, action): 32 | self.q+=action 33 | qpos = { 34 | 'robot0:shoulder_pan_joint': self.q[0], 35 | 'robot0:shoulder_lift_joint': self.q[1], 36 | 'robot0:upperarm_roll_joint': self.q[2], 37 | 'robot0:elbow_flex_joint': self.q[3], 38 | 'robot0:forearm_roll_joint':self.q[4], 39 | 'robot0:wrist_flex_joint':self.q[5], 40 | 'robot0:wrist_roll_joint':self.q[6] 41 | } 42 | for name, value in qpos.items(): 43 | self.env.sim.data.set_joint_qpos(name, value) 44 | self.env.sim.forward() 45 | time.sleep(1 / 30) 46 | ob=self.env._get_obs() 47 | #print(ob) 48 | return np.concatenate((ob['achieved_goal'],self.q),axis=0) 49 | 50 | def reset(self): 51 | self.q = [0.0, -1.0, 0.0, 2.0, 0.0, 0.5, 0.0] 52 | qpos = { 53 | 'robot0:shoulder_pan_joint':0.0 , 54 | 'robot0:shoulder_lift_joint': -1.0, 55 | 'robot0:upperarm_roll_joint': 0.0, 56 | 'robot0:elbow_flex_joint': 2.0, 57 | 'robot0:forearm_roll_joint':0.0, 58 | 'robot0:wrist_flex_joint': 0.5, 59 | 'robot0:wrist_roll_joint': 0.0 60 | } 61 | for name, value in qpos.items(): 62 | self.env.sim.data.set_joint_qpos(name, value) 63 | self.env.goal = self.target 64 | self.env.sim.forward() 65 | time.sleep(1 / 30) 66 | ob = self.env._get_obs() 67 | return np.concatenate((ob['achieved_goal'],self.q),axis=0) 68 | 69 | def render(self): 70 | self.env.render() 71 | time.sleep(1/30) 72 | 73 | 74 | 75 | i=0 76 | def rollout(env,T, random=False,trial=0): 77 | start=time.time() 78 | X = [] 79 | Y = [] 80 | x=env.reset() 81 | tt=[] 82 | env.render() 83 | rewards=[] 84 | for t in range(T): 85 | if random: 86 | u = np.random.rand(7)*0.1-0.05 87 | else: 88 | u = pilco.compute_action(x[None, :])[0, :] 89 | new_x = env.step(u) 90 | tt.append(t) 91 | distance=np.linalg.norm(new_x[:3]-env.target) 92 | rewards.append(distance) 93 | env.render() 94 | X.append(np.hstack((x, u))) 95 | Y.append(new_x-x) 96 | x=new_x 97 | if np.linalg.norm(new_x[:3]-env.target) <0.05: 98 | break 99 | plt.plot(tt, rewards) 100 | plt.title("distance to goal - Trial %d" %trial) 101 | plt.xlabel("t") 102 | plt.ylabel("d") 103 | plt.savefig("dist%d.png"%trial) 104 | plt.show() 105 | end=time.time() 106 | print ("time on real robot= %.1f s"%(end-start)) 107 | return np.stack(X),np.stack(Y),end-start 108 | 109 | @autoflow((float_type,[None, None]), (float_type,[None, None])) 110 | def predict_one_step_wrapper(mgpr, m, s): 111 | return mgpr.predict_on_noisy_inputs(m, s) 112 | 113 | 114 | @autoflow((float_type,[None, None]), (float_type,[None, None]), (np.int32, [])) 115 | def predict_trajectory_wrapper(pilco, m, s, horizon): 116 | return pilco.predict(m, s, horizon) 117 | 118 | 119 | @autoflow((float_type,[None, None]), (float_type,[None, None])) 120 | def compute_action_wrapper(pilco, m, s): 121 | return pilco.controller.compute_action(m, s) 122 | 123 | 124 | @autoflow((float_type, [None, None]), (float_type, [None, None])) 125 | def reward_wrapper(reward, m, s): 126 | return reward.compute_reward(m, s) 127 | 128 | def plot(pilco,X,Y,T,trial): 129 | fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(12, 6)) 130 | axes[0].set_title('One step prediction - Trial#%d' % trial) 131 | axes[2].set_xlabel('t') 132 | axes[1].set_ylabel('x') 133 | for i, m in enumerate(pilco.mgpr.models): 134 | y_pred_test, var_pred_test = m.predict_y(X) 135 | axes[i].plot(range(len(y_pred_test)), y_pred_test, Y[:, i]) 136 | axes[i].fill_between(range(len(y_pred_test)), 137 | y_pred_test[:, 0] - 2 * np.sqrt(var_pred_test[:, 0]), 138 | y_pred_test[:, 0] + 2 * np.sqrt(var_pred_test[:, 0]), alpha=0.3) 139 | if i==2: break 140 | 141 | plt.savefig("onep%d.png" % trial) 142 | plt.show() 143 | m_p = np.zeros((T, state_dim)) 144 | S_p = np.zeros((T, state_dim, state_dim)) 145 | m_init = X[0:1, 0:state_dim] 146 | S_init = np.diag(np.ones(state_dim) * 0.1) 147 | for h in range(T): 148 | m_h, S_h, _ = predict_trajectory_wrapper(pilco, m_init, S_init, h) 149 | m_p[h, :], S_p[h, :, :] = m_h[:], S_h[:, :] 150 | 151 | fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(12, 6)) 152 | axes[0].set_title('Multi-step prediction - Trial#%d' % trial) 153 | axes[2].set_xlabel('t') 154 | axes[1].set_ylabel('x') 155 | for i in range(state_dim): 156 | axes[i].plot(range(T - 1), m_p[0:T - 1, i], X[1:T, i]) # can't use Y_new because it stores differences (Dx) 157 | axes[i].fill_between(range(T - 1), 158 | m_p[0:T - 1, i] - 2 * np.sqrt(S_p[0:T - 1, i, i]), 159 | m_p[0:T - 1, i] + 2 * np.sqrt(S_p[0:T - 1, i, i]), alpha=0.2) 160 | if i == 2: break 161 | 162 | plt.savefig("multistep%d.png" % trial) 163 | plt.show() 164 | 165 | with tf.Session() as sess: 166 | p_start=time.time() 167 | target=np.array([1.2,0.38,0.38]) 168 | env = EnvWrapper(target) 169 | T=50 170 | num_basis_functions = 50 171 | max_action = 0.1 172 | time_on_real_robot = 0 173 | X,Y,t=rollout(env,T,random=True,trial=0) 174 | time_on_real_robot += t 175 | state_dim = Y.shape[1] 176 | control_dim = X.shape[1] - Y.shape[1] 177 | controller = RbfController(state_dim,control_dim, num_basis_functions, max_action) 178 | reward = ExponentialReward(3,t=target) 179 | pilco=PILCO(X,Y,controller=controller,reward=reward) 180 | plot(pilco,X,Y,T,0) 181 | n=4 182 | t_model=0 183 | t_policy=0 184 | for i in range(1,n): 185 | env.reset() 186 | t1 = time.time() 187 | pilco.optimize_models() 188 | t2 = time.time() 189 | t_model+=t2-t1 190 | print("model optimization done!") 191 | pilco.optimize_policy() 192 | t3 = time.time() 193 | t_policy+=t3-t2 194 | print("policy optimization done!") 195 | X_,Y_,t=rollout(env,T,trial=i) 196 | time_on_real_robot += t 197 | plot(pilco,X_,Y_,T,i) 198 | X=np.vstack((X,X_[:T, :])) 199 | X=X[:2*T] 200 | Y=np.vstack((Y,Y_[:T, :])) 201 | Y=Y[:2*T] 202 | pilco.mgpr.set_XY(X,Y) 203 | print("t_robot= %.2f s" %time_on_real_robot) 204 | print("t_model= %.2f s" %t_model) 205 | print("t_policy= %.2f s" %t_policy) 206 | print("program running time = %d s" %(time.time()-p_start)) -------------------------------------------------------------------------------- /test1/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/dist0.png -------------------------------------------------------------------------------- /test1/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/dist1.png -------------------------------------------------------------------------------- /test1/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/dist2.png -------------------------------------------------------------------------------- /test1/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/dist3.png -------------------------------------------------------------------------------- /test1/dist4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/dist4.png -------------------------------------------------------------------------------- /test1/dist5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/dist5.png -------------------------------------------------------------------------------- /test1/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/multistep0.png -------------------------------------------------------------------------------- /test1/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/multistep1.png -------------------------------------------------------------------------------- /test1/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/multistep2.png -------------------------------------------------------------------------------- /test1/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/multistep3.png -------------------------------------------------------------------------------- /test1/multistep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/multistep4.png -------------------------------------------------------------------------------- /test1/multistep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/multistep5.png -------------------------------------------------------------------------------- /test1/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/onep0.png -------------------------------------------------------------------------------- /test1/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/onep1.png -------------------------------------------------------------------------------- /test1/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/onep2.png -------------------------------------------------------------------------------- /test1/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/onep3.png -------------------------------------------------------------------------------- /test1/onep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/onep4.png -------------------------------------------------------------------------------- /test1/onep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test1/onep5.png -------------------------------------------------------------------------------- /test2/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/dist0.png -------------------------------------------------------------------------------- /test2/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/dist1.png -------------------------------------------------------------------------------- /test2/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/dist2.png -------------------------------------------------------------------------------- /test2/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/dist3.png -------------------------------------------------------------------------------- /test2/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/multistep0.png -------------------------------------------------------------------------------- /test2/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/multistep1.png -------------------------------------------------------------------------------- /test2/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/multistep2.png -------------------------------------------------------------------------------- /test2/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/multistep3.png -------------------------------------------------------------------------------- /test2/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/onep0.png -------------------------------------------------------------------------------- /test2/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/onep1.png -------------------------------------------------------------------------------- /test2/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/onep2.png -------------------------------------------------------------------------------- /test2/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/test2/onep3.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/20dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/20dist0.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/20dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/20dist1.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/20dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/20dist2.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/20dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/20dist3.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/20onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/20onep0.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/20onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/20onep1.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/20onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/20onep2.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/20onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/20onep3.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/multistep0.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/multistep1.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/multistep2.png -------------------------------------------------------------------------------- /unresponsive-paper-t25/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive-paper-t25/multistep3.png -------------------------------------------------------------------------------- /unresponsive.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | import tensorflow as tf 4 | import gpflow 5 | from gpflow import autoflow 6 | from gym.wrappers import Monitor 7 | 8 | from pilco import PILCO 9 | 10 | from controller import RbfController 11 | from reward import ExponentialReward 12 | import time 13 | import matplotlib 14 | import matplotlib.pyplot as plt 15 | 16 | float_type = gpflow.settings.dtypes.float_type 17 | 18 | np.random.seed(0) 19 | 20 | ## env -> rollout and expirement 21 | class EnvWrapper(): 22 | def __init__(self,target): 23 | self.env = gym.make('FetchReach-v1').env 24 | self.env.goal = target 25 | self.action_space = self.env.action_space 26 | self.observation_space = self.env.observation_space 27 | self.target=target 28 | self.distance_threshold=0.01 29 | self.q=[0.0,-1.0,0.0,2.0,0.0,0.5,0.0] 30 | 31 | def step(self, action): 32 | self.q+=action 33 | qpos = { 34 | 'robot0:shoulder_pan_joint': self.q[0], 35 | 'robot0:shoulder_lift_joint': self.q[1], 36 | 'robot0:upperarm_roll_joint': self.q[2], 37 | 'robot0:elbow_flex_joint': 2.0, 38 | 'robot0:forearm_roll_joint':self.q[4], 39 | 'robot0:wrist_flex_joint':self.q[5], 40 | 'robot0:wrist_roll_joint':self.q[6] 41 | } 42 | for name, value in qpos.items(): 43 | self.env.sim.data.set_joint_qpos(name, value) 44 | self.env.sim.forward() 45 | time.sleep(1 / 30) 46 | ob=self.env._get_obs() 47 | #print(ob) 48 | return np.concatenate((ob['achieved_goal'],self.q),axis=0) 49 | 50 | def reset(self): 51 | self.q = [0.0, -1.0, 0.0, 2.0, 0.0, 0.5, 0.0] 52 | qpos = { 53 | 'robot0:shoulder_pan_joint':0.0 , 54 | 'robot0:shoulder_lift_joint': -1.0, 55 | 'robot0:upperarm_roll_joint': 0.0, 56 | 'robot0:elbow_flex_joint': 2.0, 57 | 'robot0:forearm_roll_joint':0.0, 58 | 'robot0:wrist_flex_joint': 0.5, 59 | 'robot0:wrist_roll_joint': 0.0 60 | } 61 | for name, value in qpos.items(): 62 | self.env.sim.data.set_joint_qpos(name, value) 63 | self.env.goal = self.target 64 | self.env.sim.forward() 65 | time.sleep(1 / 30) 66 | ob = self.env._get_obs() 67 | return np.concatenate((ob['achieved_goal'],self.q),axis=0) 68 | 69 | def render(self): 70 | self.env.render() 71 | time.sleep(1/30) 72 | 73 | 74 | 75 | i=0 76 | def rollout(env,T, random=False,trial=0): 77 | start=time.time() 78 | X = [] 79 | Y = [] 80 | x=env.reset() 81 | tt=[] 82 | env.render() 83 | rewards=[] 84 | for t in range(T): 85 | if random: 86 | u = np.random.rand(7)*0.1-0.05 87 | else: 88 | u = pilco.compute_action(x[None, :])[0, :] 89 | new_x = env.step(u) 90 | tt.append(t) 91 | distance=np.linalg.norm(new_x[:3]-env.target) 92 | rewards.append(distance) 93 | env.render() 94 | X.append(np.hstack((x, u))) 95 | Y.append(new_x-x) 96 | x=new_x 97 | if np.linalg.norm(new_x[:3]-env.target) <0.05: 98 | break 99 | plt.plot(tt, rewards) 100 | plt.title("distance to goal - Trial %d" %trial) 101 | plt.xlabel("t") 102 | plt.ylabel("d") 103 | plt.savefig("dist%d.png"%trial) 104 | plt.show() 105 | end=time.time() 106 | print ("time on real robot= %.1f s"%(end-start)) 107 | return np.stack(X),np.stack(Y),end-start 108 | 109 | @autoflow((float_type,[None, None]), (float_type,[None, None])) 110 | def predict_one_step_wrapper(mgpr, m, s): 111 | return mgpr.predict_on_noisy_inputs(m, s) 112 | 113 | 114 | @autoflow((float_type,[None, None]), (float_type,[None, None]), (np.int32, [])) 115 | def predict_trajectory_wrapper(pilco, m, s, horizon): 116 | return pilco.predict(m, s, horizon) 117 | 118 | 119 | @autoflow((float_type,[None, None]), (float_type,[None, None])) 120 | def compute_action_wrapper(pilco, m, s): 121 | return pilco.controller.compute_action(m, s) 122 | 123 | 124 | @autoflow((float_type, [None, None]), (float_type, [None, None])) 125 | def reward_wrapper(reward, m, s): 126 | return reward.compute_reward(m, s) 127 | 128 | def plot(pilco,X,Y,T,trial): 129 | fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(12, 6)) 130 | axes[0].set_title('One step prediction - Trial#%d' % trial) 131 | axes[2].set_xlabel('t') 132 | axes[1].set_ylabel('x') 133 | for i, m in enumerate(pilco.mgpr.models): 134 | y_pred_test, var_pred_test = m.predict_y(X) 135 | axes[i].plot(range(len(y_pred_test)), y_pred_test, Y[:, i]) 136 | axes[i].fill_between(range(len(y_pred_test)), 137 | y_pred_test[:, 0] - 2 * np.sqrt(var_pred_test[:, 0]), 138 | y_pred_test[:, 0] + 2 * np.sqrt(var_pred_test[:, 0]), alpha=0.3) 139 | if i==2: break 140 | 141 | plt.savefig("onep%d.png" % trial) 142 | plt.show() 143 | m_p = np.zeros((T, state_dim)) 144 | S_p = np.zeros((T, state_dim, state_dim)) 145 | m_init = X[0:1, 0:state_dim] 146 | S_init = np.diag(np.ones(state_dim) * 0.1) 147 | for h in range(T): 148 | m_h, S_h, _ = predict_trajectory_wrapper(pilco, m_init, S_init, h) 149 | m_p[h, :], S_p[h, :, :] = m_h[:], S_h[:, :] 150 | 151 | fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(12, 6)) 152 | axes[0].set_title('Multi-step prediction - Trial#%d' % trial) 153 | axes[2].set_xlabel('t') 154 | axes[1].set_ylabel('x') 155 | for i in range(state_dim): 156 | axes[i].plot(range(T - 1), m_p[0:T - 1, i], X[1:T, i]) # can't use Y_new because it stores differences (Dx) 157 | axes[i].fill_between(range(T - 1), 158 | m_p[0:T - 1, i] - 2 * np.sqrt(S_p[0:T - 1, i, i]), 159 | m_p[0:T - 1, i] + 2 * np.sqrt(S_p[0:T - 1, i, i]), alpha=0.2) 160 | if i == 2: break 161 | 162 | plt.savefig("multistep%d.png" % trial) 163 | plt.show() 164 | 165 | with tf.Session() as sess: 166 | p_start=time.time() 167 | target=np.array([1.2,0.38,0.38]) 168 | env = EnvWrapper(target) 169 | T=25 170 | num_basis_functions = 50 171 | max_action = 0.3 172 | time_on_real_robot = 0 173 | X,Y,t=rollout(env,T,random=True,trial=0) 174 | time_on_real_robot += t 175 | state_dim = Y.shape[1] 176 | control_dim = X.shape[1] - Y.shape[1] 177 | controller = RbfController(state_dim,control_dim, num_basis_functions, max_action) 178 | reward = ExponentialReward(3,t=target) 179 | pilco=PILCO(X,Y,controller=controller,reward=reward) 180 | plot(pilco,X,Y,T,0) 181 | n=4 182 | t_model=0 183 | t_policy=0 184 | for i in range(1,n): 185 | env.reset() 186 | t1 = time.time() 187 | pilco.optimize_models() 188 | t2 = time.time() 189 | t_model+=t2-t1 190 | print("model optimization done!") 191 | pilco.optimize_policy() 192 | t3 = time.time() 193 | t_policy+=t3-t2 194 | print("policy optimization done!") 195 | X_,Y_,t=rollout(env,T,trial=i) 196 | time_on_real_robot += t 197 | plot(pilco,X_,Y_,T,i) 198 | X=np.vstack((X,X_[:T, :])) 199 | X=X[:2*T] 200 | Y=np.vstack((Y,Y_[:T, :])) 201 | Y=Y[:2*T] 202 | pilco.mgpr.set_XY(X,Y) 203 | print("t_robot= %.2f s" %time_on_real_robot) 204 | print("t_model= %.2f s" %t_model) 205 | print("t_policy= %.2f s" %t_policy) 206 | print("program running time = %d s" %(time.time()-p_start)) -------------------------------------------------------------------------------- /unresponsive/20dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/20dist0.png -------------------------------------------------------------------------------- /unresponsive/20dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/20dist1.png -------------------------------------------------------------------------------- /unresponsive/20dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/20dist2.png -------------------------------------------------------------------------------- /unresponsive/20dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/20dist3.png -------------------------------------------------------------------------------- /unresponsive/20onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/20onep0.png -------------------------------------------------------------------------------- /unresponsive/20onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/20onep1.png -------------------------------------------------------------------------------- /unresponsive/20onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/20onep2.png -------------------------------------------------------------------------------- /unresponsive/20onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/20onep3.png -------------------------------------------------------------------------------- /unresponsive/Screenshot from 2019-05-03 21-03-56.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/Screenshot from 2019-05-03 21-03-56.png -------------------------------------------------------------------------------- /unresponsive/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/multistep0.png -------------------------------------------------------------------------------- /unresponsive/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/multistep1.png -------------------------------------------------------------------------------- /unresponsive/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/multistep2.png -------------------------------------------------------------------------------- /unresponsive/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive/multistep3.png -------------------------------------------------------------------------------- /unresponsive2-fai/dist0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/dist0.png -------------------------------------------------------------------------------- /unresponsive2-fai/dist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/dist1.png -------------------------------------------------------------------------------- /unresponsive2-fai/dist2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/dist2.png -------------------------------------------------------------------------------- /unresponsive2-fai/dist3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/dist3.png -------------------------------------------------------------------------------- /unresponsive2-fai/dist4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/dist4.png -------------------------------------------------------------------------------- /unresponsive2-fai/dist5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/dist5.png -------------------------------------------------------------------------------- /unresponsive2-fai/multistep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/multistep0.png -------------------------------------------------------------------------------- /unresponsive2-fai/multistep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/multistep1.png -------------------------------------------------------------------------------- /unresponsive2-fai/multistep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/multistep2.png -------------------------------------------------------------------------------- /unresponsive2-fai/multistep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/multistep3.png -------------------------------------------------------------------------------- /unresponsive2-fai/multistep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/multistep4.png -------------------------------------------------------------------------------- /unresponsive2-fai/multistep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/multistep5.png -------------------------------------------------------------------------------- /unresponsive2-fai/onep0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/onep0.png -------------------------------------------------------------------------------- /unresponsive2-fai/onep1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/onep1.png -------------------------------------------------------------------------------- /unresponsive2-fai/onep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/onep2.png -------------------------------------------------------------------------------- /unresponsive2-fai/onep3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/onep3.png -------------------------------------------------------------------------------- /unresponsive2-fai/onep4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/onep4.png -------------------------------------------------------------------------------- /unresponsive2-fai/onep5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alonso94/Data-efficient-RL/5bc449059768d8e840e82629c2dd87cf522b34ef/unresponsive2-fai/onep5.png --------------------------------------------------------------------------------