├── README.md ├── .DS_Store ├── CartPole-v0 deep q learn.py ├── Continuous_Deep_Deterministic_Policy_Gradient_Net ├── DDPG Class ver2.ipynb └── DDPG Class ver2 (Pendulum-v0).ipynb └── Deep Q learning (Copy-v0).ipynb /README.md: -------------------------------------------------------------------------------- 1 | # OpenAI-gym-solutions 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lirnli/OpenAI-gym-solutions/HEAD/.DS_Store -------------------------------------------------------------------------------- /CartPole-v0 deep q learn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from matplotlib import pyplot as plt 4 | import gym 5 | 6 | # Get shape 7 | GYM_NAME = 'CartPole-v0' 8 | env = gym.make(GYM_NAME) 9 | obs_shape = env.observation_space.shape 10 | n_action = env.action_space.n 11 | env.close() 12 | 13 | def create_q_net(X, name=None): 14 | with tf.variable_scope(name) as scope: 15 | he_init = tf.contrib.layers.variance_scaling_initializer() 16 | xavier_init=tf.contrib.layers.xavier_initializer() 17 | # X = tf.placeholder(tf.float32, shape=(None,)+obs_shape) 18 | dense1 = tf.layers.dense(X,20,kernel_initializer=he_init, activation=tf.nn.elu) 19 | # dense1 = tf.layers.dense(X,20,activation=tf.nn.elu) 20 | dense2 = tf.layers.dense(dense1,10,kernel_initializer=he_init, activation=tf.nn.elu) # not used 21 | q_net = tf.layers.dense(dense1, n_action, kernel_initializer=xavier_init) 22 | trainable_vars = {var.name[len(scope.name):]: var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)} 23 | return q_net, trainable_vars 24 | 25 | tf.reset_default_graph() 26 | X = tf.placeholder(tf.float32, shape=(None,)+obs_shape) 27 | q_net, _ = create_q_net(X, name='q_network') 28 | 29 | from collections import deque 30 | learning_rate = 0.001 31 | gamma = 0.999 32 | memory_cap = 1000 33 | max_iteration= 100000 34 | batch_size = 100 35 | n_step = 1 36 | memory_warmup_size = memory_cap 37 | 38 | action_ph = tf.placeholder(tf.int32, shape=[None,]) 39 | qn_ph = tf.placeholder(tf.float32, shape=[None,]) 40 | q0 = tf.reduce_sum(q_net*tf.one_hot(action_ph,n_action),axis=1) 41 | loss = tf.reshape(tf.squared_difference(q0,qn_ph),[-1,1]) #CHANGED 42 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 43 | train_op = optimizer.minimize(loss) 44 | 45 | def epsilon_greedy(q_net_val, step): 46 | eps_min = 0.05 47 | eps_max = 1.0 48 | eps_decay_steps = 50000 49 | # epsilon = eps_min + step/eps_decay_steps*(eps_max-eps_min) 50 | epsilon = max(eps_min, eps_max - (eps_max-eps_min) * step/eps_decay_steps) 51 | if np.random.rand() < epsilon: 52 | return np.random.randint(n_action) 53 | return np.argmax(q_net_val) 54 | 55 | def check_model(): 56 | import time 57 | env = gym.make('CartPole-v0') 58 | obs = env.reset() 59 | step = 0 60 | while True: 61 | q_net_val = q_net.eval(feed_dict={X: np.reshape(obs,[-1,4])}) 62 | curr_action = epsilon_greedy(q_net_val,max_iteration) 63 | env.render() 64 | # time.sleep(0.1) 65 | obs, _,done,_ = env.step(curr_action) 66 | step += 1 67 | if done: 68 | break 69 | env.close() 70 | return step 71 | 72 | init = tf.global_variables_initializer() 73 | env = gym.make(GYM_NAME) 74 | from gym import wrappers 75 | env = wrappers.Monitor(env,'./tmp/',force=True) 76 | prev_obs = env.reset() 77 | prev_action = env.action_space.sample() 78 | memory = deque(maxlen=memory_cap) 79 | iteration = 0 80 | episode = 0 81 | train_step = 0 82 | config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) 83 | with tf.Session(config=config) as sess: 84 | init.run() 85 | while train_step < 100000: 86 | print('\riteration {}, episode = {}, train_step {}'.format(iteration, episode, train_step),end='') 87 | obs, reward, done, _ = env.step(prev_action) 88 | q_net_val = q_net.eval(feed_dict = {X: np.expand_dims(obs,0)}) 89 | memory.append([prev_obs, prev_action, reward, np.max(q_net_val),done]) 90 | prev_obs, prev_action = obs, epsilon_greedy(q_net_val, train_step) # CHANGED 91 | if iteration > memory_warmup_size: # train 92 | idx = np.random.permutation(len(memory)-1)[:batch_size] 93 | X_batch = np.array([memory[b][0] for b in idx]) 94 | action_batch = np.array([memory[b][1] for b in idx]) 95 | reward_batch = np.array([memory[b][2] for b in idx]) 96 | q_batch = np.array([memory[b][3] for b in idx]) 97 | done_batch = np.array([memory[b][4] for b in idx]) 98 | qn_batch = reward_batch+(~done_batch)*q_batch*gamma 99 | train_op.run(feed_dict = {X:X_batch, action_ph:action_batch, qn_ph:qn_batch}) 100 | train_step += 1 101 | if done: 102 | prev_obs = env.reset() 103 | episode += 1 104 | if episode%100==0: 105 | check_model() 106 | iteration += 1 107 | env.close() -------------------------------------------------------------------------------- /Continuous_Deep_Deterministic_Policy_Gradient_Net/DDPG Class ver2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Deep Deterministic Policy Gradient\n", 8 | "Implementation followed: Continuous control with deep reinforcement learning (arXiv:1509.02971v5)\n", 9 | "- Memory Relay\n", 10 | "- A3C\n", 11 | "- Trained with a target net\n", 12 | "- Initial exploration policy is quite important to warm up the net" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 11, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "2017-08-26T20:05:05.197940\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "import datetime\n", 32 | "print(datetime.datetime.now().isoformat())" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 1, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import numpy as np\n", 44 | "import tensorflow as tf\n", 45 | "from functools import partial" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "class Actor(object):\n", 57 | " def __init__(self, n_observation, n_action, name='actor_net'):\n", 58 | " self.n_observation = n_observation\n", 59 | " self.n_action = n_action\n", 60 | " self.name = name\n", 61 | " self.sess = None\n", 62 | " self.build_model()\n", 63 | " self.build_train()\n", 64 | " \n", 65 | " def build_model(self):\n", 66 | " activation = tf.nn.elu\n", 67 | " kernel_initializer = tf.contrib.layers.variance_scaling_initializer()\n", 68 | " kernel_regularizer = tf.contrib.layers.l2_regularizer(0.1)\n", 69 | " default_dense = partial(tf.layers.dense,\\\n", 70 | " activation=activation,\\\n", 71 | " kernel_initializer=kernel_initializer,\\\n", 72 | " kernel_regularizer=kernel_regularizer)\n", 73 | " with tf.variable_scope(self.name) as scope:\n", 74 | " observation = tf.placeholder(tf.float32,shape=[None,self.n_observation])\n", 75 | " hid1 = default_dense(observation,32)\n", 76 | " hid2 = default_dense(hid1,64)\n", 77 | " action = default_dense(hid2,self.n_action,activation=tf.nn.tanh,use_bias=False)\n", 78 | " trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=self.name)\n", 79 | " self.observation,self.action,self.trainable_vars = observation,action,trainable_vars\n", 80 | " \n", 81 | " def build_train(self,learning_rate = 0.0001):\n", 82 | " with tf.variable_scope(self.name) as scope:\n", 83 | " action_grads = tf.placeholder(tf.float32,[None,self.n_action])\n", 84 | " var_grads = tf.gradients(self.action,self.trainable_vars,-action_grads)\n", 85 | " train_op = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(var_grads,self.trainable_vars))\n", 86 | " self.action_grads,self.train_op = action_grads,train_op\n", 87 | " \n", 88 | " def predict_action(self,obs_batch):\n", 89 | " return self.action.eval(session=self.sess,feed_dict={self.observation:obs_batch})\n", 90 | "\n", 91 | " def train(self,obs_batch,action_grads):\n", 92 | " batch_size = len(action_grads)\n", 93 | " self.train_op.run(session=self.sess,feed_dict={self.observation:obs_batch,self.action_grads:action_grads/batch_size})\n", 94 | " \n", 95 | " def set_session(self,sess):\n", 96 | " self.sess = sess\n", 97 | " \n", 98 | " def get_trainable_dict(self):\n", 99 | " return {var.name[len(self.name):]: var for var in self.trainable_vars}" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 3, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "class Critic(object):\n", 111 | " def __init__(self, n_observation, n_action, name='critic_net'):\n", 112 | " self.n_observation = n_observation\n", 113 | " self.n_action = n_action\n", 114 | " self.name = name\n", 115 | " self.sess = None\n", 116 | " self.build_model()\n", 117 | " self.build_train()\n", 118 | " \n", 119 | " def build_model(self):\n", 120 | " activation = tf.nn.elu\n", 121 | " kernel_initializer = tf.contrib.layers.variance_scaling_initializer()\n", 122 | " kernel_regularizer = tf.contrib.layers.l2_regularizer(0.1)\n", 123 | " default_dense = partial(tf.layers.dense,\\\n", 124 | " activation=activation,\\\n", 125 | " kernel_initializer=kernel_initializer,\\\n", 126 | " kernel_regularizer=kernel_regularizer)\n", 127 | " with tf.variable_scope(self.name) as scope:\n", 128 | " observation = tf.placeholder(tf.float32,shape=[None,self.n_observation])\n", 129 | " action = tf.placeholder(tf.float32,shape=[None,self.n_action])\n", 130 | " hid1 = default_dense(observation,32)\n", 131 | " hid2 = default_dense(action,32)\n", 132 | " hid3 = tf.concat([hid1,hid2],axis=1)\n", 133 | " hid4 = default_dense(hid3,128)\n", 134 | " Q = default_dense(hid4,1, activation=None)\n", 135 | " trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=self.name)\n", 136 | " self.observation,self.action,self.Q,self.trainable_vars= observation,action,Q,trainable_vars\n", 137 | " \n", 138 | " def build_train(self,learning_rate=0.001):\n", 139 | " with tf.variable_scope(self.name) as scope:\n", 140 | " Qexpected = tf.placeholder(tf.float32,shape=[None,1])\n", 141 | " loss = tf.losses.mean_squared_error(Qexpected,self.Q)\n", 142 | " optimizer = tf.train.AdamOptimizer(learning_rate)\n", 143 | " train_op = optimizer.minimize(loss)\n", 144 | " self.Qexpected,self.train_op = Qexpected,train_op\n", 145 | " self.action_grads = tf.gradients(self.Q,self.action)[0]\n", 146 | " \n", 147 | " def predict_Q(self,obs_batch,action_batch):\n", 148 | " return self.Q.eval(session=self.sess,\\\n", 149 | " feed_dict={self.observation:obs_batch,self.action:action_batch})\n", 150 | " \n", 151 | " def compute_action_grads(self,obs_batch,action_batch):\n", 152 | " return self.action_grads.eval(session=self.sess,\\\n", 153 | " feed_dict={self.observation:obs_batch,self.action:action_batch})\n", 154 | " def train(self,obs_batch,action_batch,Qexpected_batch):\n", 155 | " self.train_op.run(session=self.sess,\\\n", 156 | " feed_dict={self.observation:obs_batch,self.action:action_batch,self.Qexpected:Qexpected_batch})\n", 157 | " \n", 158 | " def set_session(self,sess):\n", 159 | " self.sess = sess\n", 160 | " \n", 161 | " def get_trainable_dict(self):\n", 162 | " return {var.name[len(self.name):]: var for var in self.trainable_vars}" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 4, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "class AsyncNets(object):\n", 174 | " def __init__(self,class_name):\n", 175 | " class_ = eval(class_name)\n", 176 | " self.net = class_(2,1,name=class_name)\n", 177 | " self.target_net = class_(2,1,name='{}_target'.format(class_name))\n", 178 | " self.TAU = tf.placeholder(tf.float32,shape=None)\n", 179 | " self.sess = None\n", 180 | " self.__build_async_assign()\n", 181 | " \n", 182 | " def __build_async_assign(self):\n", 183 | " net_dict = self.net.get_trainable_dict()\n", 184 | " target_net_dict = self.target_net.get_trainable_dict()\n", 185 | " keys = net_dict.keys()\n", 186 | " async_update_op = [target_net_dict[key].assign((1-self.TAU)*target_net_dict[key]+self.TAU*net_dict[key]) \\\n", 187 | " for key in keys]\n", 188 | " self.async_update_op = async_update_op\n", 189 | " \n", 190 | " def async_update(self,tau=0.01):\n", 191 | " self.sess.run(self.async_update_op,feed_dict={self.TAU:tau})\n", 192 | " \n", 193 | " def set_session(self,sess):\n", 194 | " self.sess = sess\n", 195 | " self.net.set_session(sess)\n", 196 | " self.target_net.set_session(sess)\n", 197 | " \n", 198 | " def get_subnets(self):\n", 199 | " return self.net, self.target_net\n", 200 | " \n", 201 | " " 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 5, 207 | "metadata": { 208 | "collapsed": true 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "from collections import deque\n", 213 | "class Memory(object):\n", 214 | " def __init__(self,memory_size=10000):\n", 215 | " self.memory = deque(maxlen=memory_size)\n", 216 | " self.memory_size = memory_size\n", 217 | " \n", 218 | " def __len__(self):\n", 219 | " return len(self.memory)\n", 220 | " \n", 221 | " def append(self,item):\n", 222 | " self.memory.append(item)\n", 223 | " \n", 224 | " def sample_batch(self,batch_size=256):\n", 225 | " idx = np.random.permutation(len(self.memory))[:batch_size]\n", 226 | " return [self.memory[i] for i in idx]" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 6, 232 | "metadata": { 233 | "collapsed": true, 234 | "scrolled": false 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "def UONoise():\n", 239 | " theta = 0.15\n", 240 | " sigma = 0.2\n", 241 | " state = 0\n", 242 | " while True:\n", 243 | " yield state\n", 244 | " state += -theta*state+sigma*np.random.randn()" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 9, 250 | "metadata": { 251 | "collapsed": false, 252 | "scrolled": false 253 | }, 254 | "outputs": [ 255 | { 256 | "name": "stderr", 257 | "output_type": "stream", 258 | "text": [ 259 | "[2017-08-26 19:41:54,989] Making new env: MountainCarContinuous-v0\n", 260 | "[2017-08-26 19:41:55,000] Finished writing results. You can upload them to the scoreboard via gym.upload('/Users/winter/Google Drive/handson-ml/tmp')\n", 261 | "[2017-08-26 19:41:55,008] Clearing 12 monitor files from previous run (because force=True was provided)\n", 262 | "[2017-08-26 19:41:55,010] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.2.955.video000000.mp4\n" 263 | ] 264 | }, 265 | { 266 | "name": "stdout", 267 | "output_type": "stream", 268 | "text": [ 269 | "iter 993, ep 0" 270 | ] 271 | }, 272 | { 273 | "name": "stderr", 274 | "output_type": "stream", 275 | "text": [ 276 | "[2017-08-26 19:42:12,100] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.2.955.video000001.mp4\n" 277 | ] 278 | }, 279 | { 280 | "name": "stdout", 281 | "output_type": "stream", 282 | "text": [ 283 | "iter 998, ep 0, score -16.821533, steps 999\n", 284 | "iter 1575, ep 1, score 90.513038, steps 577\n", 285 | "iter 2574, ep 2, score -13.971783, steps 999\n", 286 | "iter 3573, ep 3, score -12.553009, steps 999\n", 287 | "iter 4338, ep 4, score 88.878217, steps 765\n", 288 | "iter 5337, ep 5, score -14.499419, steps 999\n", 289 | "iter 6048, ep 6, score 91.099272, steps 711\n", 290 | "iter 6676, ep 7" 291 | ] 292 | }, 293 | { 294 | "name": "stderr", 295 | "output_type": "stream", 296 | "text": [ 297 | "[2017-08-26 19:42:55,373] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.2.955.video000008.mp4\n" 298 | ] 299 | }, 300 | { 301 | "name": "stdout", 302 | "output_type": "stream", 303 | "text": [ 304 | ", score 90.996675, steps 628\n", 305 | "iter 7675, ep 8, score -10.819755, steps 999\n", 306 | "iter 8674, ep 9, score -11.558511, steps 999\n", 307 | "iter 8979, ep 10, score 94.314570, steps 305\n", 308 | "iter 9978, ep 11, score -10.546130, steps 999\n", 309 | "iter 10977, ep 12, score -12.766908, steps 999\n", 310 | "iter 11976, ep 13, score -10.189042, steps 999\n", 311 | "iter 12975, ep 14, score -11.460311, steps 999\n", 312 | "iter 13974, ep 15, score -13.411151, steps 999\n", 313 | "iter 14973, ep 16, score -14.304451, steps 999\n", 314 | "iter 15676, ep 17, score 88.887782, steps 703\n", 315 | "iter 16242, ep 18, score 92.691063, steps 566\n", 316 | "iter 17241, ep 19, score -11.693453, steps 999\n", 317 | "iter 18023, ep 20, score 90.685539, steps 782\n", 318 | "iter 19022, ep 21, score -13.186908, steps 999\n", 319 | "iter 19463, ep 22, score 94.026987, steps 441\n", 320 | "iter 20462, ep 23, score -13.140392, steps 999\n", 321 | "iter 21063, ep 24, score 90.732156, steps 601\n", 322 | "iter 22062, ep 25, score -15.018492, steps 999\n", 323 | "iter 22621, ep 26" 324 | ] 325 | }, 326 | { 327 | "name": "stderr", 328 | "output_type": "stream", 329 | "text": [ 330 | "[2017-08-26 19:44:53,473] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.2.955.video000027.mp4\n" 331 | ] 332 | }, 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "iter 22644, ep 26, score 93.464829, steps 582\n", 338 | "iter 23408, ep 27, score 89.554092, steps 764\n", 339 | "iter 24091, ep 28, score 87.892311, steps 683\n", 340 | "iter 25005, ep 29, score 85.951233, steps 914\n", 341 | "iter 26004, ep 30, score -12.744702, steps 999\n", 342 | "iter 26531, ep 31, score 94.225969, steps 527\n", 343 | "iter 27530, ep 32, score -16.797004, steps 999\n", 344 | "iter 28219, ep 33, score 89.064114, steps 689\n", 345 | "iter 28892, ep 34, score 85.746313, steps 673\n", 346 | "iter 29891, ep 35, score -17.804687, steps 999\n", 347 | "iter 30890, ep 36, score -20.038071, steps 999\n", 348 | "iter 31889, ep 37, score -21.388418, steps 999\n", 349 | "iter 32613, ep 38, score 86.763517, steps 724\n", 350 | "iter 32895, ep 39, score 94.907800, steps 282\n", 351 | "iter 33183, ep 40, score 96.955224, steps 288\n", 352 | "iter 33407, ep 41, score 96.408442, steps 224\n", 353 | "iter 33651, ep 42, score 95.072191, steps 244\n", 354 | "iter 33817, ep 43, score 97.042655, steps 166\n", 355 | "iter 34050, ep 44, score 95.092518, steps 233\n", 356 | "iter 34209, ep 45, score 97.400811, steps 159\n", 357 | "iter 34365, ep 46, score 97.076128, steps 156\n", 358 | "iter 34601, ep 47, score 94.900098, steps 236\n", 359 | "iter 34757, ep 48, score 96.290122, steps 156\n", 360 | "iter 35005, ep 49, score 95.847698, steps 248\n", 361 | "iter 35173, ep 50, score 96.491020, steps 168\n", 362 | "iter 35333, ep 51, score 95.923480, steps 160\n", 363 | "iter 35488, ep 52, score 95.869699, steps 155\n", 364 | "iter 35645, ep 53, score 96.014779, steps 157\n", 365 | "iter 35816, ep 54, score 95.952439, steps 171\n", 366 | "iter 35971, ep 55, score 95.244905, steps 155\n", 367 | "iter 36129, ep 56, score 95.436702, steps 158\n", 368 | "iter 36287, ep 57, score 94.857622, steps 158\n", 369 | "iter 36445, ep 58, score 94.044361, steps 158\n", 370 | "iter 36586, ep 59, score 95.303828, steps 141\n", 371 | "iter 36675, ep 60, score 96.390997, steps 89\n", 372 | "iter 36837, ep 61, score 94.398890, steps 162\n", 373 | "iter 36992, ep 62, score 93.246401, steps 155\n", 374 | "iter 37141, ep 63" 375 | ] 376 | }, 377 | { 378 | "name": "stderr", 379 | "output_type": "stream", 380 | "text": [ 381 | "[2017-08-26 19:46:36,338] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.2.955.video000064.mp4\n" 382 | ] 383 | }, 384 | { 385 | "name": "stdout", 386 | "output_type": "stream", 387 | "text": [ 388 | "iter 37144, ep 63, score 93.996951, steps 152\n", 389 | "iter 37291, ep 64, score 94.829838, steps 147\n", 390 | "iter 37467, ep 65, score 93.702264, steps 176\n", 391 | "iter 37644, ep 66, score 92.224951, steps 177\n", 392 | "iter 37794, ep 67, score 93.515279, steps 150\n", 393 | "iter 37950, ep 68, score 93.164297, steps 156\n", 394 | "iter 38083, ep 69, score 94.774690, steps 133\n", 395 | "iter 38235, ep 70, score 93.599692, steps 152\n", 396 | "iter 38354, ep 71, score 95.467911, steps 119\n", 397 | "iter 38484, ep 72, score 94.388090, steps 130\n", 398 | "iter 38558, ep 73, score 95.755785, steps 74\n", 399 | "iter 38706, ep 74, score 92.254909, steps 148\n", 400 | "iter 38783, ep 75, score 95.610128, steps 77\n", 401 | "iter 38864, ep 76, score 95.461576, steps 81\n", 402 | "iter 38967, ep 77, score 94.318447, steps 103\n", 403 | "iter 39102, ep 78, score 92.680197, steps 135\n", 404 | "iter 39179, ep 79, score 95.486308, steps 77\n", 405 | "iter 39318, ep 80, score 92.853924, steps 139\n", 406 | "iter 39394, ep 81, score 95.659714, steps 76\n", 407 | "iter 39465, ep 82, score 95.117314, steps 71\n", 408 | "iter 39547, ep 83, score 94.727829, steps 82\n", 409 | "iter 39629, ep 84, score 94.514347, steps 82\n", 410 | "iter 39704, ep 85, score 94.652798, steps 75\n", 411 | "iter 39777, ep 86, score 94.445660, steps 73\n", 412 | "iter 39858, ep 87, score 94.239863, steps 81\n", 413 | "iter 39929, ep 88, score 94.312136, steps 71\n", 414 | "iter 40001, ep 89, score 94.509007, steps 72\n", 415 | "iter 40072, ep 90, score 94.666801, steps 71\n", 416 | "iter 40145, ep 91, score 94.389765, steps 73\n", 417 | "iter 40214, ep 92, score 94.419837, steps 69\n", 418 | "iter 40282, ep 93, score 94.341991, steps 68\n", 419 | "iter 40355, ep 94, score 94.098858, steps 73\n", 420 | "iter 40423, ep 95, score 94.070725, steps 68\n", 421 | "iter 40491, ep 96, score 93.962244, steps 68\n", 422 | "iter 40558, ep 97, score 93.858663, steps 67\n", 423 | "iter 40627, ep 98, score 93.886947, steps 69\n", 424 | "iter 40694, ep 99, score 93.802138, steps 67\n", 425 | "iter 40760, ep 100, score 93.595257, steps 66\n", 426 | "iter 40857, ep 101, score 91.245881, steps 97\n", 427 | "iter 40923, ep 102, score 93.663442, steps 66\n", 428 | "iter 40989, ep 103, score 93.636351, steps 66\n", 429 | "iter 41055, ep 104, score 93.594846, steps 66\n", 430 | "iter 41149, ep 105, score 91.236285, steps 94\n", 431 | "iter 41215, ep 106, score 93.607248, steps 66\n", 432 | "iter 41280, ep 107, score 93.701786, steps 65\n", 433 | "iter 41345, ep 108, score 93.706605, steps 65\n", 434 | "iter 41411, ep 109, score 93.658946, steps 66\n", 435 | "iter 41505, ep 110, score 91.134111, steps 94\n", 436 | "iter 41572, ep 111, score 93.676209, steps 67\n", 437 | "iter 41667, ep 112, score 91.063218, steps 95\n", 438 | "iter 41733, ep 113, score 93.674531, steps 66\n", 439 | "iter 41827, ep 114, score 91.197605, steps 94\n", 440 | "iter 41893, ep 115, score 93.654271, steps 66\n", 441 | "iter 41958, ep 116, score 93.711481, steps 65\n", 442 | "iter 42024, ep 117, score 93.648411, steps 66\n", 443 | "iter 42089, ep 118, score 93.672089, steps 65\n", 444 | "iter 42159, ep 119, score 93.721641, steps 70\n", 445 | "iter 42225, ep 120, score 93.626316, steps 66\n", 446 | "iter 42318, ep 121, score 91.763871, steps 93\n", 447 | "iter 42383, ep 122, score 93.714648, steps 65\n", 448 | "iter 42478, ep 123, score 90.994213, steps 95\n", 449 | "iter 42560, ep 124" 450 | ] 451 | }, 452 | { 453 | "name": "stderr", 454 | "output_type": "stream", 455 | "text": [ 456 | "[2017-08-26 19:47:15,862] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.2.955.video000125.mp4\n" 457 | ] 458 | }, 459 | { 460 | "name": "stdout", 461 | "output_type": "stream", 462 | "text": [ 463 | "iter 42572, ep 124, score 91.147539, steps 94\n", 464 | "iter 42668, ep 125, score 90.983828, steps 96\n", 465 | "iter 42761, ep 126, score 91.405670, steps 93\n", 466 | "iter 42826, ep 127, score 93.698148, steps 65\n", 467 | "iter 42897, ep 128, score 93.416037, steps 71\n", 468 | "iter 42963, ep 129, score 93.638845, steps 66\n", 469 | "iter 43028, ep 130, score 93.683336, steps 65\n", 470 | "iter 43094, ep 131, score 93.612474, steps 66\n", 471 | "iter 43175, ep 132, score 93.375587, steps 81\n", 472 | "iter 43243, ep 133, score 93.515115, steps 68\n", 473 | "iter 43309, ep 134, score 93.638845, steps 66\n", 474 | "iter 43380, ep 135, score 93.605524, steps 71\n", 475 | "iter 43445, ep 136, score 93.657869, steps 65\n", 476 | "iter 43531, ep 137, score 92.299140, steps 86\n", 477 | "iter 43597, ep 138, score 93.641797, steps 66\n", 478 | "iter 43662, ep 139, score 93.683167, steps 65\n", 479 | "iter 43743, ep 140, score 93.438447, steps 81\n", 480 | "iter 43809, ep 141, score 93.611080, steps 66\n", 481 | "iter 43874, ep 142, score 93.664508, steps 65\n", 482 | "iter 43942, ep 143, score 93.549058, steps 68\n", 483 | "iter 44008, ep 144, score 93.622160, steps 66\n", 484 | "iter 44073, ep 145, score 93.656425, steps 65\n", 485 | "iter 44138, ep 146, score 93.673692, steps 65\n", 486 | "iter 44204, ep 147, score 93.635087, steps 66\n", 487 | "iter 44301, ep 148, score 91.656801, steps 97\n", 488 | "iter 44368, ep 149, score 93.572884, steps 67\n", 489 | "iter 44434, ep 150, score 93.599984, steps 66\n", 490 | "iter 44499, ep 151, score 93.671498, steps 65\n", 491 | "iter 44564, ep 152, score 93.672448, steps 65\n", 492 | "iter 44631, ep 153, score 93.563558, steps 67\n", 493 | "iter 44726, ep 154, score 91.479624, steps 95\n", 494 | "iter 44791, ep 155, score 93.668465, steps 65\n", 495 | "iter 44856, ep 156, score 93.668898, steps 65\n", 496 | "iter 44921, ep 157, score 93.654076, steps 65\n", 497 | "iter 44988, ep 158, score 93.559129, steps 67\n", 498 | "iter 45053, ep 159, score 93.662076, steps 65\n", 499 | "iter 45128, ep 160, score 93.459317, steps 75\n", 500 | "iter 45193, ep 161, score 93.648433, steps 65\n", 501 | "iter 45258, ep 162, score 93.650194, steps 65\n", 502 | "iter 45324, ep 163, score 93.586031, steps 66\n", 503 | "iter 45389, ep 164, score 93.643323, steps 65\n", 504 | "iter 45455, ep 165, score 93.605231, steps 66\n", 505 | "iter 45522, ep 166, score 93.527247, steps 67\n", 506 | "iter 45592, ep 167, score 93.562021, steps 70\n", 507 | "iter 45659, ep 168, score 93.544799, steps 67\n", 508 | "iter 45726, ep 169, score 93.528387, steps 67\n", 509 | "iter 45793, ep 170, score 93.544919, steps 67\n", 510 | "iter 45860, ep 171, score 93.532902, steps 67\n", 511 | "iter 45927, ep 172, score 93.545647, steps 67\n", 512 | "iter 45994, ep 173, score 93.458183, steps 67\n", 513 | "iter 46061, ep 174, score 93.438279, steps 67\n", 514 | "iter 46128, ep 175, score 93.464301, steps 67\n", 515 | "iter 46195, ep 176, score 93.507404, steps 67\n", 516 | "iter 46262, ep 177, score 93.439179, steps 67\n", 517 | "iter 46329, ep 178, score 93.486629, steps 67\n", 518 | "iter 46397, ep 179, score 93.448294, steps 68\n", 519 | "iter 46464, ep 180, score 93.465806, steps 67\n", 520 | "iter 46531, ep 181, score 93.452436, steps 67\n", 521 | "iter 46598, ep 182, score 93.476762, steps 67\n", 522 | "iter 46665, ep 183, score 93.457539, steps 67\n", 523 | "iter 46732, ep 184, score 93.503596, steps 67\n", 524 | "iter 46800, ep 185, score 93.333887, steps 68\n", 525 | "iter 46867, ep 186, score 93.435041, steps 67\n", 526 | "iter 46933, ep 187, score 93.528608, steps 66\n", 527 | "iter 46999, ep 188, score 93.544496, steps 66\n", 528 | "iter 47065, ep 189, score 93.591187, steps 66\n", 529 | "iter 47136, ep 190, score 93.541665, steps 71\n", 530 | "iter 47202, ep 191, score 93.583979, steps 66\n", 531 | "iter 47274, ep 192, score 93.592861, steps 72\n", 532 | "iter 47343, ep 193, score 93.404450, steps 69\n", 533 | "iter 47408, ep 194, score 93.641312, steps 65\n", 534 | "iter 47503, ep 195, score 90.889741, steps 95\n", 535 | "iter 47568, ep 196, score 93.641167, steps 65\n", 536 | "iter 47635, ep 197, score 93.505963, steps 67\n", 537 | "iter 47706, ep 198, score 93.425933, steps 71\n", 538 | "iter 47797, ep 199, score 91.760404, steps 91\n" 539 | ] 540 | }, 541 | { 542 | "name": "stderr", 543 | "output_type": "stream", 544 | "text": [ 545 | "[2017-08-26 19:47:55,111] Finished writing results. You can upload them to the scoreboard via gym.upload('/Users/winter/Google Drive/handson-ml/tmp')\n" 546 | ] 547 | } 548 | ], 549 | "source": [ 550 | "import gym\n", 551 | "from gym import wrappers\n", 552 | "max_episode = 200\n", 553 | "gamma = 0.99\n", 554 | "tau = 0.001\n", 555 | "memory_size = 10000\n", 556 | "batch_size = 256\n", 557 | "memory_warmup = batch_size*3\n", 558 | "max_explore_eps = 100\n", 559 | "save_path = 'DDPG_net_Class.ckpt'\n", 560 | "\n", 561 | "tf.reset_default_graph()\n", 562 | "actorAsync = AsyncNets('Actor')\n", 563 | "actor,actor_target = actorAsync.get_subnets()\n", 564 | "criticAsync = AsyncNets('Critic')\n", 565 | "critic,critic_target = criticAsync.get_subnets()\n", 566 | "\n", 567 | "init = tf.global_variables_initializer()\n", 568 | "saver = tf.train.Saver()\n", 569 | "with tf.Session() as sess:\n", 570 | " init.run()\n", 571 | " actorAsync.set_session(sess)\n", 572 | " criticAsync.set_session(sess)\n", 573 | " env = gym.make('MountainCarContinuous-v0')\n", 574 | " env = wrappers.Monitor(env,'./tmp/',force=True)\n", 575 | " obs = env.reset()\n", 576 | " iteration = 0\n", 577 | " episode = 0\n", 578 | " episode_score = 0\n", 579 | " episode_steps = 0\n", 580 | " noise = UONoise()\n", 581 | " memory = Memory(memory_size)\n", 582 | " while episode < max_episode:\n", 583 | " print('\\riter {}, ep {}'.format(iteration,episode),end='')\n", 584 | " action = actor.predict_action(np.reshape(obs,[1,-1]))[0]\n", 585 | " if episode= memory_warmup:\n", 591 | " memory_batch = memory.sample_batch(batch_size)\n", 592 | " extract_mem = lambda k : np.array([item[k] for item in memory_batch])\n", 593 | " obs_batch = extract_mem(0)\n", 594 | " action_batch = extract_mem(1)\n", 595 | " reward_batch = extract_mem(2)\n", 596 | " next_obs_batch = extract_mem(3)\n", 597 | " done_batch = extract_mem(4)\n", 598 | " action_next = actor_target.predict_action(next_obs_batch)\n", 599 | " Q_next = critic_target.predict_Q(next_obs_batch,action_next)[:,0]\n", 600 | " Qexpected_batch = reward_batch + gamma*(1-done_batch)*Q_next # target Q value\n", 601 | " Qexpected_batch = np.reshape(Qexpected_batch,[-1,1])\n", 602 | " # train critic\n", 603 | " critic.train(obs_batch,action_batch,Qexpected_batch)\n", 604 | " # train actor\n", 605 | " action_grads = critic.compute_action_grads(obs_batch,action_batch)\n", 606 | " actor.train(obs_batch,action_grads)\n", 607 | " # async update\n", 608 | " actorAsync.async_update(tau)\n", 609 | " criticAsync.async_update(tau)\n", 610 | " episode_score += reward\n", 611 | " episode_steps += 1\n", 612 | " iteration += 1\n", 613 | " if done:\n", 614 | " print(', score {:8f}, steps {}'.format(episode_score,episode_steps))\n", 615 | "# if episode%5 == 0:\n", 616 | " \n", 617 | "# Q_check = \n", 618 | " obs = env.reset()\n", 619 | " episode += 1\n", 620 | " episode_score = 0\n", 621 | " episode_steps = 0\n", 622 | " noise = UONoise()\n", 623 | " if episode%25==0:\n", 624 | " saver.save(sess,save_path)\n", 625 | " else:\n", 626 | " obs = next_obs\n", 627 | "env.close()" 628 | ] 629 | } 630 | ], 631 | "metadata": { 632 | "anaconda-cloud": {}, 633 | "kernelspec": { 634 | "display_name": "Python [conda env:tensorflow]", 635 | "language": "python", 636 | "name": "conda-env-tensorflow-py" 637 | }, 638 | "language_info": { 639 | "codemirror_mode": { 640 | "name": "ipython", 641 | "version": 3 642 | }, 643 | "file_extension": ".py", 644 | "mimetype": "text/x-python", 645 | "name": "python", 646 | "nbconvert_exporter": "python", 647 | "pygments_lexer": "ipython3", 648 | "version": "3.6.2" 649 | } 650 | }, 651 | "nbformat": 4, 652 | "nbformat_minor": 2 653 | } 654 | -------------------------------------------------------------------------------- /Deep Q learning (Copy-v0).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# OpenAI gym: Copy-v0 with Simple Q-learning \n", 8 | "- Observation dim = [1] \n", 9 | "- Action dim = [2,2,5]. Use encoding 0-19\n", 10 | "- The rest is a standard Q learning with a memory relay\n", 11 | "- Initial exploration is random. I found 100 episodes were not enough, and I kept using 1000 episodes." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "2017-08-27T00:30:05.059532\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "import datetime\n", 31 | "print(datetime.datetime.now().isoformat())" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "import tensorflow as tf\n", 43 | "import numpy as np\n", 44 | "import gym\n", 45 | "from gym import wrappers" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": { 52 | "collapsed": false, 53 | "scrolled": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "tf.reset_default_graph()\n", 58 | "k_init = tf.contrib.layers.variance_scaling_initializer()\n", 59 | "X = tf.placeholder(tf.int32,shape=[None,1])\n", 60 | "hid1 = tf.reshape(tf.one_hot(X,6),[-1,6])\n", 61 | "hid2 = tf.layers.dense(hid1,64,activation=tf.nn.elu,kernel_initializer=k_init)\n", 62 | "hid3 = tf.layers.dense(hid2,128,activation=tf.nn.elu,kernel_initializer=k_init)\n", 63 | "Q = tf.layers.dense(hid2,20)\n", 64 | "action_ph = tf.placeholder(tf.int32,shape=[None,])\n", 65 | "Q_action = tf.reduce_sum(Q*tf.one_hot(action_ph,20),axis=1,keep_dims=True)\n", 66 | "Qexpected = tf.placeholder(tf.float32,shape=[None,1])\n", 67 | "loss = tf.losses.mean_squared_error(Qexpected,Q_action)\n", 68 | "optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)\n", 69 | "train_op = optimizer.minimize(loss)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": { 76 | "collapsed": true 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "from collections import deque\n", 81 | "class Memory(object):\n", 82 | " def __init__(self,memory_size=10000):\n", 83 | " self.memory = deque(maxlen=memory_size)\n", 84 | " self.memory_size = memory_size\n", 85 | " \n", 86 | " def __len__(self):\n", 87 | " return len(self.memory)\n", 88 | " \n", 89 | " def append(self,item):\n", 90 | " self.memory.append(item)\n", 91 | " \n", 92 | " def sample_batch(self,batch_size=256):\n", 93 | " idx = np.random.permutation(len(self.memory))[:batch_size]\n", 94 | " return [self.memory[i] for i in idx]" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 5, 100 | "metadata": { 101 | "collapsed": false, 102 | "scrolled": false 103 | }, 104 | "outputs": [ 105 | { 106 | "name": "stderr", 107 | "output_type": "stream", 108 | "text": [ 109 | "[2017-08-27 00:30:10,892] Making new env: Copy-v0\n", 110 | "[2017-08-27 00:30:10,985] Clearing 18 monitor files from previous run (because force=True was provided)\n", 111 | "[2017-08-27 00:30:10,988] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000000.json\n", 112 | "[2017-08-27 00:30:11,094] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000001.json\n", 113 | "[2017-08-27 00:30:11,099] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000008.json\n", 114 | "[2017-08-27 00:30:11,114] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000027.json\n", 115 | "[2017-08-27 00:30:11,140] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000064.json\n", 116 | "[2017-08-27 00:30:11,173] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000125.json\n", 117 | "[2017-08-27 00:30:11,227] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000216.json\n" 118 | ] 119 | }, 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "\r", 125 | "iter 0, ep 0\r", 126 | "iter 1, ep 0\r", 127 | "iter 2, ep 0\r", 128 | "iter 3, ep 1\r", 129 | "iter 4, ep 1\r", 130 | "iter 5, ep 1\r", 131 | "iter 6, ep 1\r", 132 | "iter 7, ep 1\r", 133 | "iter 8, ep 2\r", 134 | "iter 9, ep 2\r", 135 | "iter 10, ep 3\r", 136 | "iter 11, ep 4\r", 137 | "iter 12, ep 5\r", 138 | "iter 13, ep 6\r", 139 | "iter 14, ep 6\r", 140 | "iter 15, ep 7\r", 141 | "iter 16, ep 7\r", 142 | "iter 17, ep 8\r", 143 | "iter 18, ep 9\r", 144 | "iter 19, ep 9\r", 145 | "iter 20, ep 9\r", 146 | "iter 21, ep 10\r", 147 | "iter 22, ep 10\r", 148 | "iter 23, ep 10\r", 149 | "iter 24, ep 10\r", 150 | "iter 25, ep 11\r", 151 | "iter 26, ep 12\r", 152 | "iter 27, ep 12\r", 153 | "iter 28, ep 12\r", 154 | "iter 29, ep 12\r", 155 | "iter 30, ep 12\r", 156 | "iter 31, ep 12\r", 157 | "iter 32, ep 13\r", 158 | "iter 33, ep 13\r", 159 | "iter 34, ep 13\r", 160 | "iter 35, ep 13\r", 161 | "iter 36, ep 14\r", 162 | "iter 37, ep 14\r", 163 | "iter 38, ep 14\r", 164 | "iter 39, ep 14\r", 165 | "iter 40, ep 14\r", 166 | "iter 41, ep 14\r", 167 | "iter 42, ep 14\r", 168 | "iter 43, ep 15\r", 169 | "iter 44, ep 16\r", 170 | "iter 45, ep 16\r", 171 | "iter 46, ep 17\r", 172 | "iter 47, ep 17\r", 173 | "iter 48, ep 17\r", 174 | "iter 49, ep 17\r", 175 | "iter 50, ep 18\r", 176 | "iter 51, ep 18\r", 177 | "iter 52, ep 19\r", 178 | "iter 53, ep 19\r", 179 | "iter 54, ep 19\r", 180 | "iter 55, ep 19\r", 181 | "iter 56, ep 19\r", 182 | "iter 57, ep 19\r", 183 | "iter 58, ep 19\r", 184 | "iter 59, ep 19\r", 185 | "iter 60, ep 19\r", 186 | "iter 61, ep 20\r", 187 | "iter 62, ep 20\r", 188 | "iter 63, ep 21\r", 189 | "iter 64, ep 21\r", 190 | "iter 65, ep 22\r", 191 | "iter 66, ep 22\r", 192 | "iter 67, ep 23\r", 193 | "iter 68, ep 23\r", 194 | "iter 69, ep 23\r", 195 | "iter 70, ep 24\r", 196 | "iter 71, ep 24\r", 197 | "iter 72, ep 25\r", 198 | "iter 73, ep 25\r", 199 | "iter 74, ep 26\r", 200 | "iter 75, ep 26\r", 201 | "iter 76, ep 26\r", 202 | "iter 77, ep 27\r", 203 | "iter 78, ep 27\r", 204 | "iter 79, ep 27\r", 205 | "iter 80, ep 27\r", 206 | "iter 81, ep 27\r", 207 | "iter 82, ep 27\r", 208 | "iter 83, ep 27\r", 209 | "iter 84, ep 27\r", 210 | "iter 85, ep 28\r", 211 | "iter 86, ep 28\r", 212 | "iter 87, ep 29\r", 213 | "iter 88, ep 30\r", 214 | "iter 89, ep 30\r", 215 | "iter 90, ep 31\r", 216 | "iter 91, ep 32\r", 217 | "iter 92, ep 32\r", 218 | "iter 93, ep 32\r", 219 | "iter 94, ep 33\r", 220 | "iter 95, ep 34\r", 221 | "iter 96, ep 34\r", 222 | "iter 97, ep 35\r", 223 | "iter 98, ep 35\r", 224 | "iter 99, ep 35\r", 225 | "iter 100, ep 35\r", 226 | "iter 101, ep 35\r", 227 | "iter 102, ep 36\r", 228 | "iter 103, ep 36\r", 229 | "iter 104, ep 36\r", 230 | "iter 105, ep 36\r", 231 | "iter 106, ep 36\r", 232 | "iter 107, ep 36\r", 233 | "iter 108, ep 37\r", 234 | "iter 109, ep 38\r", 235 | "iter 110, ep 39\r", 236 | "iter 111, ep 40\r", 237 | "iter 112, ep 40\r", 238 | "iter 113, ep 40\r", 239 | "iter 114, ep 41\r", 240 | "iter 115, ep 41\r", 241 | "iter 116, ep 42\r", 242 | "iter 117, ep 42\r", 243 | "iter 118, ep 43\r", 244 | "iter 119, ep 44\r", 245 | "iter 120, ep 44\r", 246 | "iter 121, ep 44\r", 247 | "iter 122, ep 45\r", 248 | "iter 123, ep 45\r", 249 | "iter 124, ep 45\r", 250 | "iter 125, ep 46\r", 251 | "iter 126, ep 46\r", 252 | "iter 127, ep 46\r", 253 | "iter 128, ep 46\r", 254 | "iter 129, ep 46\r", 255 | "iter 130, ep 46\r", 256 | "iter 131, ep 47\r", 257 | "iter 132, ep 47\r", 258 | "iter 133, ep 47\r", 259 | "iter 134, ep 47\r", 260 | "iter 135, ep 48\r", 261 | "iter 136, ep 49\r", 262 | "iter 137, ep 50\r", 263 | "iter 138, ep 51\r", 264 | "iter 139, ep 51\r", 265 | "iter 140, ep 51\r", 266 | "iter 141, ep 51\r", 267 | "iter 142, ep 51\r", 268 | "iter 143, ep 51\r", 269 | "iter 144, ep 52\r", 270 | "iter 145, ep 52\r", 271 | "iter 146, ep 52\r", 272 | "iter 147, ep 53\r", 273 | "iter 148, ep 53\r", 274 | "iter 149, ep 53\r", 275 | "iter 150, ep 53\r", 276 | "iter 151, ep 53\r", 277 | "iter 152, ep 53\r", 278 | "iter 153, ep 53\r", 279 | "iter 154, ep 53\r", 280 | "iter 155, ep 54\r", 281 | "iter 156, ep 54\r", 282 | "iter 157, ep 54\r", 283 | "iter 158, ep 54\r", 284 | "iter 159, ep 54\r", 285 | "iter 160, ep 54\r", 286 | "iter 161, ep 55\r", 287 | "iter 162, ep 55\r", 288 | "iter 163, ep 55\r", 289 | "iter 164, ep 56\r", 290 | "iter 165, ep 57\r", 291 | "iter 166, ep 57\r", 292 | "iter 167, ep 58\r", 293 | "iter 168, ep 58\r", 294 | "iter 169, ep 58\r", 295 | "iter 170, ep 58\r", 296 | "iter 171, ep 58\r", 297 | "iter 172, ep 58\r", 298 | "iter 173, ep 58\r", 299 | "iter 174, ep 59\r", 300 | "iter 175, ep 60\r", 301 | "iter 176, ep 61\r", 302 | "iter 177, ep 62\r", 303 | "iter 178, ep 62\r", 304 | "iter 179, ep 62\r", 305 | "iter 180, ep 63\r", 306 | "iter 181, ep 63\r", 307 | "iter 182, ep 64\r", 308 | "iter 183, ep 64\r", 309 | "iter 184, ep 65\r", 310 | "iter 185, ep 66\r", 311 | "iter 186, ep 67\r", 312 | "iter 187, ep 67\r", 313 | "iter 188, ep 68\r", 314 | "iter 189, ep 69\r", 315 | "iter 190, ep 70\r", 316 | "iter 191, ep 70\r", 317 | "iter 192, ep 71\r", 318 | "iter 193, ep 71\r", 319 | "iter 194, ep 72\r", 320 | "iter 195, ep 72\r", 321 | "iter 196, ep 73\r", 322 | "iter 197, ep 74\r", 323 | "iter 198, ep 75\r", 324 | "iter 199, ep 76\r", 325 | "iter 200, ep 77\r", 326 | "iter 201, ep 78\r", 327 | "iter 202, ep 78\r", 328 | "iter 203, ep 78\r", 329 | "iter 204, ep 79\r", 330 | "iter 205, ep 80\r", 331 | "iter 206, ep 80\r", 332 | "iter 207, ep 80\r", 333 | "iter 208, ep 81\r", 334 | "iter 209, ep 82\r", 335 | "iter 210, ep 82\r", 336 | "iter 211, ep 82\r", 337 | "iter 212, ep 82\r", 338 | "iter 213, ep 82\r", 339 | "iter 214, ep 83\r", 340 | "iter 215, ep 83\r", 341 | "iter 216, ep 83\r", 342 | "iter 217, ep 83\r", 343 | "iter 218, ep 83\r", 344 | "iter 219, ep 83\r", 345 | "iter 220, ep 83\r", 346 | "iter 221, ep 84\r", 347 | "iter 222, ep 85\r", 348 | "iter 223, ep 86\r", 349 | "iter 224, ep 87\r", 350 | "iter 225, ep 87\r", 351 | "iter 226, ep 87\r", 352 | "iter 227, ep 87\r", 353 | "iter 228, ep 88\r", 354 | "iter 229, ep 88\r", 355 | "iter 230, ep 88\r", 356 | "iter 231, ep 89\r", 357 | "iter 232, ep 89\r", 358 | "iter 233, ep 89\r", 359 | "iter 234, ep 90\r", 360 | "iter 235, ep 91\r", 361 | "iter 236, ep 91\r", 362 | "iter 237, ep 92\r", 363 | "iter 238, ep 92\r", 364 | "iter 239, ep 92\r", 365 | "iter 240, ep 92\r", 366 | "iter 241, ep 93\r", 367 | "iter 242, ep 93\r", 368 | "iter 243, ep 94\r", 369 | "iter 244, ep 95\r", 370 | "iter 245, ep 95\r", 371 | "iter 246, ep 96\r", 372 | "iter 247, ep 97\r", 373 | "iter 248, ep 97\r", 374 | "iter 249, ep 97\r", 375 | "iter 250, ep 97\r", 376 | "iter 251, ep 97\r", 377 | "iter 252, ep 97\r", 378 | "iter 253, ep 97\r", 379 | "iter 254, ep 98\r", 380 | "iter 255, ep 99\r", 381 | "iter 256, ep 100\r", 382 | "iter 257, ep 101\r", 383 | "iter 258, ep 101\r", 384 | "iter 259, ep 101\r", 385 | "iter 260, ep 102\r", 386 | "iter 261, ep 102\r", 387 | "iter 262, ep 102\r", 388 | "iter 263, ep 102\r", 389 | "iter 264, ep 103\r", 390 | "iter 265, ep 103\r", 391 | "iter 266, ep 103\r", 392 | "iter 267, ep 103\r", 393 | "iter 268, ep 104\r", 394 | "iter 269, ep 104\r", 395 | "iter 270, ep 104\r", 396 | "iter 271, ep 104\r", 397 | "iter 272, ep 104\r", 398 | "iter 273, ep 104\r", 399 | "iter 274, ep 105\r", 400 | "iter 275, ep 105\r", 401 | "iter 276, ep 105\r", 402 | "iter 277, ep 105\r", 403 | "iter 278, ep 106\r", 404 | "iter 279, ep 106\r", 405 | "iter 280, ep 106\r", 406 | "iter 281, ep 107\r", 407 | "iter 282, ep 107\r", 408 | "iter 283, ep 107\r", 409 | "iter 284, ep 107\r", 410 | "iter 285, ep 107\r", 411 | "iter 286, ep 108\r", 412 | "iter 287, ep 109\r", 413 | "iter 288, ep 109\r", 414 | "iter 289, ep 109\r", 415 | "iter 290, ep 109\r", 416 | "iter 291, ep 110\r", 417 | "iter 292, ep 111\r", 418 | "iter 293, ep 111\r", 419 | "iter 294, ep 111\r", 420 | "iter 295, ep 111\r", 421 | "iter 296, ep 112\r", 422 | "iter 297, ep 112\r", 423 | "iter 298, ep 112\r", 424 | "iter 299, ep 112\r", 425 | "iter 300, ep 112\r", 426 | "iter 301, ep 113\r", 427 | "iter 302, ep 114\r", 428 | "iter 303, ep 114\r", 429 | "iter 304, ep 114\r", 430 | "iter 305, ep 115\r", 431 | "iter 306, ep 115\r", 432 | "iter 307, ep 116\r", 433 | "iter 308, ep 117\r", 434 | "iter 309, ep 117\r", 435 | "iter 310, ep 117\r", 436 | "iter 311, ep 117\r", 437 | "iter 312, ep 117\r", 438 | "iter 313, ep 118\r", 439 | "iter 314, ep 119\r", 440 | "iter 315, ep 120\r", 441 | "iter 316, ep 120\r", 442 | "iter 317, ep 121\r", 443 | "iter 318, ep 122\r", 444 | "iter 319, ep 122\r", 445 | "iter 320, ep 123\r", 446 | "iter 321, ep 124\r", 447 | "iter 322, ep 124\r", 448 | "iter 323, ep 125\r", 449 | "iter 324, ep 126\r", 450 | "iter 325, ep 126\r", 451 | "iter 326, ep 126\r", 452 | "iter 327, ep 126\r", 453 | "iter 328, ep 126\r", 454 | "iter 329, ep 126\r", 455 | "iter 330, ep 126\r", 456 | "iter 331, ep 126\r", 457 | "iter 332, ep 127\r", 458 | "iter 333, ep 127\r", 459 | "iter 334, ep 127\r", 460 | "iter 335, ep 128\r", 461 | "iter 336, ep 129\r", 462 | "iter 337, ep 130\r", 463 | "iter 338, ep 131\r", 464 | "iter 339, ep 131\r", 465 | "iter 340, ep 131\r", 466 | "iter 341, ep 132\r", 467 | "iter 342, ep 132\r", 468 | "iter 343, ep 133\r", 469 | "iter 344, ep 133\r", 470 | "iter 345, ep 133\r", 471 | "iter 346, ep 133\r", 472 | "iter 347, ep 134\r", 473 | "iter 348, ep 135\r", 474 | "iter 349, ep 135\r", 475 | "iter 350, ep 136\r", 476 | "iter 351, ep 137\r", 477 | "iter 352, ep 137\r", 478 | "iter 353, ep 137\r", 479 | "iter 354, ep 138\r", 480 | "iter 355, ep 139\r", 481 | "iter 356, ep 140\r", 482 | "iter 357, ep 140\r", 483 | "iter 358, ep 140\r", 484 | "iter 359, ep 140\r", 485 | "iter 360, ep 141\r", 486 | "iter 361, ep 141\r", 487 | "iter 362, ep 141\r", 488 | "iter 363, ep 141\r", 489 | "iter 364, ep 142\r", 490 | "iter 365, ep 142\r", 491 | "iter 366, ep 143\r", 492 | "iter 367, ep 144\r", 493 | "iter 368, ep 145\r", 494 | "iter 369, ep 145\r", 495 | "iter 370, ep 145\r", 496 | "iter 371, ep 146\r", 497 | "iter 372, ep 146\r", 498 | "iter 373, ep 147\r", 499 | "iter 374, ep 147\r", 500 | "iter 375, ep 148\r", 501 | "iter 376, ep 149\r", 502 | "iter 377, ep 150\r", 503 | "iter 378, ep 150\r", 504 | "iter 379, ep 151\r", 505 | "iter 380, ep 151\r", 506 | "iter 381, ep 151\r", 507 | "iter 382, ep 151\r", 508 | "iter 383, ep 151\r", 509 | "iter 384, ep 151\r", 510 | "iter 385, ep 151\r", 511 | "iter 386, ep 151\r", 512 | "iter 387, ep 151\r", 513 | "iter 388, ep 151\r", 514 | "iter 389, ep 152\r", 515 | "iter 390, ep 152\r", 516 | "iter 391, ep 153\r", 517 | "iter 392, ep 154\r", 518 | "iter 393, ep 155\r", 519 | "iter 394, ep 155\r", 520 | "iter 395, ep 155\r", 521 | "iter 396, ep 155\r", 522 | "iter 397, ep 155\r", 523 | "iter 398, ep 155\r", 524 | "iter 399, ep 156\r", 525 | "iter 400, ep 157\r", 526 | "iter 401, ep 157\r", 527 | "iter 402, ep 158\r", 528 | "iter 403, ep 159\r", 529 | "iter 404, ep 159\r", 530 | "iter 405, ep 160\r", 531 | "iter 406, ep 161\r", 532 | "iter 407, ep 162\r", 533 | "iter 408, ep 163\r", 534 | "iter 409, ep 163\r", 535 | "iter 410, ep 163\r", 536 | "iter 411, ep 163\r", 537 | "iter 412, ep 163\r", 538 | "iter 413, ep 164\r", 539 | "iter 414, ep 165\r", 540 | "iter 415, ep 166\r", 541 | "iter 416, ep 167\r", 542 | "iter 417, ep 167\r", 543 | "iter 418, ep 168\r", 544 | "iter 419, ep 169\r", 545 | "iter 420, ep 169\r", 546 | "iter 421, ep 169\r", 547 | "iter 422, ep 170\r", 548 | "iter 423, ep 171\r", 549 | "iter 424, ep 171\r", 550 | "iter 425, ep 171\r", 551 | "iter 426, ep 172\r", 552 | "iter 427, ep 173\r", 553 | "iter 428, ep 174\r", 554 | "iter 429, ep 175\r", 555 | "iter 430, ep 175\r", 556 | "iter 431, ep 175\r", 557 | "iter 432, ep 175\r", 558 | "iter 433, ep 175\r", 559 | "iter 434, ep 176\r", 560 | "iter 435, ep 176\r", 561 | "iter 436, ep 177\r", 562 | "iter 437, ep 178\r", 563 | "iter 438, ep 179\r", 564 | "iter 439, ep 179\r", 565 | "iter 440, ep 179\r", 566 | "iter 441, ep 180\r", 567 | "iter 442, ep 180\r", 568 | "iter 443, ep 180\r", 569 | "iter 444, ep 181\r", 570 | "iter 445, ep 181\r", 571 | "iter 446, ep 181\r", 572 | "iter 447, ep 181\r", 573 | "iter 448, ep 182\r", 574 | "iter 449, ep 183\r", 575 | "iter 450, ep 183\r", 576 | "iter 451, ep 184\r", 577 | "iter 452, ep 185\r", 578 | "iter 453, ep 186\r", 579 | "iter 454, ep 186\r", 580 | "iter 455, ep 186\r", 581 | "iter 456, ep 186\r", 582 | "iter 457, ep 186\r", 583 | "iter 458, ep 187\r", 584 | "iter 459, ep 187\r", 585 | "iter 460, ep 188\r", 586 | "iter 461, ep 189\r", 587 | "iter 462, ep 189\r", 588 | "iter 463, ep 189\r", 589 | "iter 464, ep 190\r", 590 | "iter 465, ep 191\r", 591 | "iter 466, ep 191\r", 592 | "iter 467, ep 191\r", 593 | "iter 468, ep 191\r", 594 | "iter 469, ep 191\r", 595 | "iter 470, ep 191\r", 596 | "iter 471, ep 191\r", 597 | "iter 472, ep 192\r", 598 | "iter 473, ep 192\r", 599 | "iter 474, ep 192\r", 600 | "iter 475, ep 193\r", 601 | "iter 476, ep 193\r", 602 | "iter 477, ep 193\r", 603 | "iter 478, ep 193\r", 604 | "iter 479, ep 193\r", 605 | "iter 480, ep 194\r", 606 | "iter 481, ep 194\r", 607 | "iter 482, ep 195\r", 608 | "iter 483, ep 196\r", 609 | "iter 484, ep 196\r", 610 | "iter 485, ep 196\r", 611 | "iter 486, ep 196\r", 612 | "iter 487, ep 196\r", 613 | "iter 488, ep 197\r", 614 | "iter 489, ep 197\r", 615 | "iter 490, ep 198\r", 616 | "iter 491, ep 198\r", 617 | "iter 492, ep 198\r", 618 | "iter 493, ep 199\r", 619 | "iter 494, ep 199\r", 620 | "iter 495, ep 199\r", 621 | "iter 496, ep 200\r", 622 | "iter 497, ep 200\r", 623 | "iter 498, ep 201\r", 624 | "iter 499, ep 202\r", 625 | "iter 500, ep 202\r", 626 | "iter 501, ep 203\r", 627 | "iter 502, ep 203\r", 628 | "iter 503, ep 203\r", 629 | "iter 504, ep 204\r", 630 | "iter 505, ep 205\r", 631 | "iter 506, ep 205\r", 632 | "iter 507, ep 205\r", 633 | "iter 508, ep 205\r", 634 | "iter 509, ep 206\r", 635 | "iter 510, ep 206\r", 636 | "iter 511, ep 206\r", 637 | "iter 512, ep 206\r", 638 | "iter 513, ep 206\r", 639 | "iter 514, ep 207\r", 640 | "iter 515, ep 207\r", 641 | "iter 516, ep 208\r", 642 | "iter 517, ep 208\r", 643 | "iter 518, ep 209\r", 644 | "iter 519, ep 209\r", 645 | "iter 520, ep 210\r", 646 | "iter 521, ep 210\r", 647 | "iter 522, ep 210\r", 648 | "iter 523, ep 211\r", 649 | "iter 524, ep 211\r", 650 | "iter 525, ep 211\r", 651 | "iter 526, ep 211\r", 652 | "iter 527, ep 211\r", 653 | "iter 528, ep 211\r", 654 | "iter 529, ep 211\r", 655 | "iter 530, ep 211\r", 656 | "iter 531, ep 211\r", 657 | "iter 532, ep 212\r", 658 | "iter 533, ep 213\r", 659 | "iter 534, ep 213\r", 660 | "iter 535, ep 214\r", 661 | "iter 536, ep 214\r", 662 | "iter 537, ep 215\r", 663 | "iter 538, ep 216\r", 664 | "iter 539, ep 217\r", 665 | "iter 540, ep 218\r", 666 | "iter 541, ep 219\r", 667 | "iter 542, ep 219\r", 668 | "iter 543, ep 219\r", 669 | "iter 544, ep 220\r", 670 | "iter 545, ep 220\r", 671 | "iter 546, ep 220\r", 672 | "iter 547, ep 220\r", 673 | "iter 548, ep 220\r", 674 | "iter 549, ep 220\r", 675 | "iter 550, ep 220\r", 676 | "iter 551, ep 220\r", 677 | "iter 552, ep 221\r", 678 | "iter 553, ep 221\r", 679 | "iter 554, ep 222\r", 680 | "iter 555, ep 222\r", 681 | "iter 556, ep 223\r", 682 | "iter 557, ep 223\r", 683 | "iter 558, ep 223\r", 684 | "iter 559, ep 223\r", 685 | "iter 560, ep 224\r", 686 | "iter 561, ep 225\r", 687 | "iter 562, ep 226\r", 688 | "iter 563, ep 226\r", 689 | "iter 564, ep 226\r", 690 | "iter 565, ep 226\r", 691 | "iter 566, ep 227\r", 692 | "iter 567, ep 228\r", 693 | "iter 568, ep 228\r", 694 | "iter 569, ep 229\r", 695 | "iter 570, ep 229\r", 696 | "iter 571, ep 229\r", 697 | "iter 572, ep 230\r", 698 | "iter 573, ep 230\r", 699 | "iter 574, ep 231\r", 700 | "iter 575, ep 231\r", 701 | "iter 576, ep 232\r", 702 | "iter 577, ep 232\r", 703 | "iter 578, ep 233\r", 704 | "iter 579, ep 234\r", 705 | "iter 580, ep 234\r", 706 | "iter 581, ep 235\r", 707 | "iter 582, ep 235\r", 708 | "iter 583, ep 236\r", 709 | "iter 584, ep 237\r", 710 | "iter 585, ep 237\r", 711 | "iter 586, ep 237\r", 712 | "iter 587, ep 237\r", 713 | "iter 588, ep 237\r", 714 | "iter 589, ep 237\r", 715 | "iter 590, ep 237\r", 716 | "iter 591, ep 237\r", 717 | "iter 592, ep 237\r", 718 | "iter 593, ep 238\r", 719 | "iter 594, ep 238\r", 720 | "iter 595, ep 239\r", 721 | "iter 596, ep 239\r", 722 | "iter 597, ep 240\r", 723 | "iter 598, ep 240\r", 724 | "iter 599, ep 241\r", 725 | "iter 600, ep 242\r", 726 | "iter 601, ep 242\r", 727 | "iter 602, ep 243\r", 728 | "iter 603, ep 243\r", 729 | "iter 604, ep 244\r", 730 | "iter 605, ep 244\r", 731 | "iter 606, ep 244\r", 732 | "iter 607, ep 244\r", 733 | "iter 608, ep 245\r", 734 | "iter 609, ep 245\r", 735 | "iter 610, ep 246\r", 736 | "iter 611, ep 247\r", 737 | "iter 612, ep 247\r", 738 | "iter 613, ep 248\r", 739 | "iter 614, ep 249\r", 740 | "iter 615, ep 250\r", 741 | "iter 616, ep 251\r", 742 | "iter 617, ep 251\r", 743 | "iter 618, ep 251\r", 744 | "iter 619, ep 251\r", 745 | "iter 620, ep 251\r", 746 | "iter 621, ep 251\r", 747 | "iter 622, ep 252\r", 748 | "iter 623, ep 252\r", 749 | "iter 624, ep 252\r", 750 | "iter 625, ep 253\r", 751 | "iter 626, ep 254\r", 752 | "iter 627, ep 254\r", 753 | "iter 628, ep 255\r", 754 | "iter 629, ep 256\r", 755 | "iter 630, ep 257\r", 756 | "iter 631, ep 257\r", 757 | "iter 632, ep 258\r", 758 | "iter 633, ep 259\r", 759 | "iter 634, ep 259\r", 760 | "iter 635, ep 260\r", 761 | "iter 636, ep 261\r", 762 | "iter 637, ep 261\r", 763 | "iter 638, ep 262\r", 764 | "iter 639, ep 263\r", 765 | "iter 640, ep 264\r", 766 | "iter 641, ep 265\r", 767 | "iter 642, ep 266\r", 768 | "iter 643, ep 266\r", 769 | "iter 644, ep 267\r", 770 | "iter 645, ep 267\r", 771 | "iter 646, ep 268\r", 772 | "iter 647, ep 269\r", 773 | "iter 648, ep 269\r", 774 | "iter 649, ep 270\r", 775 | "iter 650, ep 270\r", 776 | "iter 651, ep 270\r", 777 | "iter 652, ep 270\r", 778 | "iter 653, ep 271\r", 779 | "iter 654, ep 271\r", 780 | "iter 655, ep 272\r", 781 | "iter 656, ep 272\r", 782 | "iter 657, ep 272\r", 783 | "iter 658, ep 272\r", 784 | "iter 659, ep 273\r", 785 | "iter 660, ep 273\r", 786 | "iter 661, ep 273\r", 787 | "iter 662, ep 273\r", 788 | "iter 663, ep 273\r", 789 | "iter 664, ep 273\r", 790 | "iter 665, ep 274\r", 791 | "iter 666, ep 275\r", 792 | "iter 667, ep 275\r", 793 | "iter 668, ep 275\r", 794 | "iter 669, ep 275\r", 795 | "iter 670, ep 276\r", 796 | "iter 671, ep 276\r", 797 | "iter 672, ep 276\r", 798 | "iter 673, ep 276\r", 799 | "iter 674, ep 277\r", 800 | "iter 675, ep 278\r", 801 | "iter 676, ep 278\r", 802 | "iter 677, ep 278\r", 803 | "iter 678, ep 279\r", 804 | "iter 679, ep 279\r", 805 | "iter 680, ep 280\r", 806 | "iter 681, ep 280\r", 807 | "iter 682, ep 281\r", 808 | "iter 683, ep 281\r", 809 | "iter 684, ep 281\r", 810 | "iter 685, ep 282\r", 811 | "iter 686, ep 283\r", 812 | "iter 687, ep 283\r", 813 | "iter 688, ep 284\r", 814 | "iter 689, ep 284\r", 815 | "iter 690, ep 284\r", 816 | "iter 691, ep 284\r", 817 | "iter 692, ep 285\r", 818 | "iter 693, ep 285\r", 819 | "iter 694, ep 286\r", 820 | "iter 695, ep 286\r", 821 | "iter 696, ep 286\r", 822 | "iter 697, ep 286\r", 823 | "iter 698, ep 286\r", 824 | "iter 699, ep 286\r", 825 | "iter 700, ep 287\r", 826 | "iter 701, ep 287\r", 827 | "iter 702, ep 288\r", 828 | "iter 703, ep 288\r", 829 | "iter 704, ep 288\r", 830 | "iter 705, ep 289\r", 831 | "iter 706, ep 290\r", 832 | "iter 707, ep 290\r", 833 | "iter 708, ep 291\r", 834 | "iter 709, ep 292\r", 835 | "iter 710, ep 292\r", 836 | "iter 711, ep 293\r", 837 | "iter 712, ep 293\r", 838 | "iter 713, ep 294\r", 839 | "iter 714, ep 294\r", 840 | "iter 715, ep 295\r", 841 | "iter 716, ep 295\r", 842 | "iter 717, ep 295\r", 843 | "iter 718, ep 295\r", 844 | "iter 719, ep 296\r", 845 | "iter 720, ep 297\r", 846 | "iter 721, ep 298\r", 847 | "iter 722, ep 299\r", 848 | "iter 723, ep 299\r", 849 | "iter 724, ep 299\r", 850 | "iter 725, ep 299\r", 851 | "iter 726, ep 299\r", 852 | "iter 727, ep 299\r", 853 | "iter 728, ep 300\r", 854 | "iter 729, ep 300\r", 855 | "iter 730, ep 301\r", 856 | "iter 731, ep 301\r", 857 | "iter 732, ep 301\r", 858 | "iter 733, ep 301\r", 859 | "iter 734, ep 301\r", 860 | "iter 735, ep 301\r", 861 | "iter 736, ep 302\r", 862 | "iter 737, ep 303\r", 863 | "iter 738, ep 303\r", 864 | "iter 739, ep 304\r", 865 | "iter 740, ep 304\r", 866 | "iter 741, ep 304\r", 867 | "iter 742, ep 304\r", 868 | "iter 743, ep 304\r", 869 | "iter 744, ep 305\r", 870 | "iter 745, ep 306\r", 871 | "iter 746, ep 306\r", 872 | "iter 747, ep 307\r", 873 | "iter 748, ep 307\r", 874 | "iter 749, ep 308\r", 875 | "iter 750, ep 308\r", 876 | "iter 751, ep 308\r", 877 | "iter 752, ep 309\r", 878 | "iter 753, ep 310\r", 879 | "iter 754, ep 310\r", 880 | "iter 755, ep 310\r", 881 | "iter 756, ep 311\r", 882 | "iter 757, ep 312\r", 883 | "iter 758, ep 312\r", 884 | "iter 759, ep 313\r", 885 | "iter 760, ep 313\r", 886 | "iter 761, ep 313\r", 887 | "iter 762, ep 313\r", 888 | "iter 763, ep 313\r", 889 | "iter 764, ep 314\r", 890 | "iter 765, ep 315\r", 891 | "iter 766, ep 316\r", 892 | "iter 767, ep 316\r", 893 | "iter 768, ep 316\r", 894 | "iter 769, ep 316" 895 | ] 896 | }, 897 | { 898 | "name": "stderr", 899 | "output_type": "stream", 900 | "text": [ 901 | "[2017-08-27 00:30:11,433] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000343.json\n" 902 | ] 903 | }, 904 | { 905 | "name": "stdout", 906 | "output_type": "stream", 907 | "text": [ 908 | "iter 1018, ep 400, ep reward 0.5, ep steps 8\n", 909 | "iter 1361, ep 495" 910 | ] 911 | }, 912 | { 913 | "name": "stderr", 914 | "output_type": "stream", 915 | "text": [ 916 | "[2017-08-27 00:30:12,899] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000512.json\n" 917 | ] 918 | }, 919 | { 920 | "name": "stdout", 921 | "output_type": "stream", 922 | "text": [ 923 | "iter 1383, ep 500, ep reward -0.5, ep steps 1\n", 924 | "iter 1780, ep 600, ep reward -0.5, ep steps 6\n", 925 | "iter 2130, ep 700, ep reward 0.5, ep steps 4\n", 926 | "iter 2213, ep 722" 927 | ] 928 | }, 929 | { 930 | "name": "stderr", 931 | "output_type": "stream", 932 | "text": [ 933 | "[2017-08-27 00:30:14,838] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000729.json\n" 934 | ] 935 | }, 936 | { 937 | "name": "stdout", 938 | "output_type": "stream", 939 | "text": [ 940 | "iter 2530, ep 800, ep reward -0.5, ep steps 4\n", 941 | "iter 2981, ep 900, ep reward -1.0, ep steps 11\n", 942 | "iter 3410, ep 989" 943 | ] 944 | }, 945 | { 946 | "name": "stderr", 947 | "output_type": "stream", 948 | "text": [ 949 | "[2017-08-27 00:30:17,720] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video001000.json\n" 950 | ] 951 | }, 952 | { 953 | "name": "stdout", 954 | "output_type": "stream", 955 | "text": [ 956 | "iter 3466, ep 1000, ep reward 3.0, ep steps 3\n", 957 | "iter 3911, ep 1100, ep reward -0.5, ep steps 2\n", 958 | "iter 4298, ep 1200, ep reward 0.5, ep steps 3\n", 959 | "iter 4730, ep 1300, ep reward -1.0, ep steps 9\n", 960 | "iter 5099, ep 1400, ep reward -0.5, ep steps 3\n", 961 | "iter 5484, ep 1500, ep reward 0.5, ep steps 3\n", 962 | "iter 5900, ep 1600, ep reward 4.0, ep steps 4\n", 963 | "iter 6333, ep 1700, ep reward 0.5, ep steps 3\n", 964 | "iter 6775, ep 1800, ep reward 2.0, ep steps 13\n", 965 | "iter 7154, ep 1900, ep reward 5.0, ep steps 5\n", 966 | "iter 7946, ep 1996" 967 | ] 968 | }, 969 | { 970 | "name": "stderr", 971 | "output_type": "stream", 972 | "text": [ 973 | "[2017-08-27 00:30:29,364] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video002000.json\n" 974 | ] 975 | }, 976 | { 977 | "name": "stdout", 978 | "output_type": "stream", 979 | "text": [ 980 | "iter 7983, ep 2000, ep reward -0.5, ep steps 2\n", 981 | "iter 8536, ep 2100, ep reward -0.5, ep steps 3\n", 982 | "iter 8932, ep 2200, ep reward 5.5, ep steps 8\n", 983 | "iter 9557, ep 2300, ep reward 14.0, ep steps 14\n", 984 | "iter 11332, ep 2400, ep reward 24.0, ep steps 24\n", 985 | "iter 13512, ep 2500, ep reward 0.5, ep steps 3\n", 986 | "iter 14469, ep 2600, ep reward 4.5, ep steps 7\n", 987 | "iter 16248, ep 2700, ep reward 31.0, ep steps 31\n", 988 | "iter 19349, ep 2800, ep reward 32.0, ep steps 32\n", 989 | "iter 22447, ep 2900, ep reward 32.0, ep steps 32\n", 990 | "iter 25479, ep 2998" 991 | ] 992 | }, 993 | { 994 | "name": "stderr", 995 | "output_type": "stream", 996 | "text": [ 997 | "[2017-08-27 00:31:17,244] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video003000.json\n" 998 | ] 999 | }, 1000 | { 1001 | "name": "stdout", 1002 | "output_type": "stream", 1003 | "text": [ 1004 | "iter 25543, ep 3000, ep reward 32.0, ep steps 32\n", 1005 | "iter 28633, ep 3100, ep reward 32.0, ep steps 32\n", 1006 | "iter 31742, ep 3200, ep reward 32.0, ep steps 32\n", 1007 | "iter 34853, ep 3300, ep reward 30.0, ep steps 30\n", 1008 | "iter 37933, ep 3400, ep reward 30.0, ep steps 30\n", 1009 | "iter 41039, ep 3500, ep reward 31.0, ep steps 31\n", 1010 | "iter 44139, ep 3600, ep reward 32.0, ep steps 32\n", 1011 | "iter 47246, ep 3700, ep reward 32.0, ep steps 32\n", 1012 | "iter 50344, ep 3800, ep reward 31.0, ep steps 31\n", 1013 | "iter 53448, ep 3900, ep reward 31.0, ep steps 31\n", 1014 | "iter 56517, ep 3999" 1015 | ] 1016 | }, 1017 | { 1018 | "name": "stderr", 1019 | "output_type": "stream", 1020 | "text": [ 1021 | "[2017-08-27 00:32:41,871] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video004000.json\n" 1022 | ] 1023 | }, 1024 | { 1025 | "name": "stdout", 1026 | "output_type": "stream", 1027 | "text": [ 1028 | "iter 56556, ep 4000, ep reward 30.0, ep steps 30\n", 1029 | "iter 59677, ep 4100, ep reward 31.0, ep steps 31\n", 1030 | "iter 62776, ep 4200, ep reward 32.0, ep steps 32\n", 1031 | "iter 65881, ep 4300, ep reward 31.0, ep steps 31\n", 1032 | "iter 68980, ep 4400, ep reward 32.0, ep steps 32\n", 1033 | "iter 72094, ep 4500, ep reward 30.0, ep steps 30\n", 1034 | "iter 75179, ep 4600, ep reward 31.0, ep steps 31\n", 1035 | "iter 78282, ep 4700, ep reward 32.0, ep steps 32\n", 1036 | "iter 81372, ep 4800, ep reward 32.0, ep steps 32\n", 1037 | "iter 84463, ep 4900, ep reward 30.0, ep steps 30\n", 1038 | "iter 87530, ep 4999" 1039 | ] 1040 | }, 1041 | { 1042 | "name": "stderr", 1043 | "output_type": "stream", 1044 | "text": [ 1045 | "[2017-08-27 00:34:07,657] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video005000.json\n" 1046 | ] 1047 | }, 1048 | { 1049 | "name": "stdout", 1050 | "output_type": "stream", 1051 | "text": [ 1052 | "\r", 1053 | "iter 87531, ep 5000\r", 1054 | "iter 87532, ep 5000\r", 1055 | "iter 87533, ep 5000\r", 1056 | "iter 87534, ep 5000\r", 1057 | "iter 87535, ep 5000\r", 1058 | "iter 87536, ep 5000\r", 1059 | "iter 87537, ep 5000\r", 1060 | "iter 87538, ep 5000\r", 1061 | "iter 87539, ep 5000\r", 1062 | "iter 87540, ep 5000\r", 1063 | "iter 87541, ep 5000\r", 1064 | "iter 87542, ep 5000\r", 1065 | "iter 87543, ep 5000\r", 1066 | "iter 87544, ep 5000\r", 1067 | "iter 87545, ep 5000\r", 1068 | "iter 87546, ep 5000\r", 1069 | "iter 87547, ep 5000\r", 1070 | "iter 87548, ep 5000\r", 1071 | "iter 87549, ep 5000\r", 1072 | "iter 87550, ep 5000\r", 1073 | "iter 87551, ep 5000\r", 1074 | "iter 87552, ep 5000\r", 1075 | "iter 87553, ep 5000\r", 1076 | "iter 87554, ep 5000\r", 1077 | "iter 87555, ep 5000\r", 1078 | "iter 87556, ep 5000\r", 1079 | "iter 87557, ep 5000\r", 1080 | "iter 87558, ep 5000\r", 1081 | "iter 87559, ep 5000\r", 1082 | "iter 87560, ep 5000\r", 1083 | "iter 87561, ep 5000\r", 1084 | "iter 87562, ep 5000, ep reward 32.0, ep steps 32\n" 1085 | ] 1086 | }, 1087 | { 1088 | "name": "stderr", 1089 | "output_type": "stream", 1090 | "text": [ 1091 | "[2017-08-27 00:34:07,873] Finished writing results. You can upload them to the scoreboard via gym.upload('/Users/winter/Google Drive/handson-ml/tmp')\n" 1092 | ] 1093 | } 1094 | ], 1095 | "source": [ 1096 | "gamma = 0.99\n", 1097 | "max_episode = 5000\n", 1098 | "batch_size = 256\n", 1099 | "memory_warmup = batch_size*3\n", 1100 | "memory = Memory()\n", 1101 | "explore_eps = 1000\n", 1102 | "\n", 1103 | "env = gym.make('Copy-v0')\n", 1104 | "env = wrappers.Monitor(env,'./tmp/',force=True)\n", 1105 | "obs = env.reset()\n", 1106 | "init = tf.global_variables_initializer()\n", 1107 | "with tf.Session() as sess:\n", 1108 | " init.run()\n", 1109 | " iteration = 0\n", 1110 | " episode = 0\n", 1111 | " episode_reward = 0\n", 1112 | " episode_steps = 0\n", 1113 | " while episode <= max_episode:\n", 1114 | " print('\\riter {}, ep {}'.format(iteration,episode),end='')\n", 1115 | " p = episode/explore_eps\n", 1116 | " if np.random.rand() > p:\n", 1117 | " action = env.action_space.sample()\n", 1118 | " else:\n", 1119 | " Q_val = Q.eval(feed_dict={X:np.reshape(obs,[1,-1])})\n", 1120 | " action = np.argmax(Q_val,axis=1)\n", 1121 | " action = np.unravel_index(action[0],[2,2,5])\n", 1122 | " next_obs,reward,done,_ = env.step(action)\n", 1123 | " memory.append([obs,np.ravel_multi_index(action,[2,2,5]),reward,next_obs,done])\n", 1124 | " if iteration > memory_warmup:\n", 1125 | " memory_batch = memory.sample_batch(batch_size)\n", 1126 | " extract_mem = lambda k: np.array([item[k] for item in memory_batch])\n", 1127 | " obs_batch = extract_mem(0)\n", 1128 | " action_batch = extract_mem(1)\n", 1129 | " reward_batch = extract_mem(2)\n", 1130 | " next_obs_batch = extract_mem(3)\n", 1131 | " done_batch = extract_mem(4)\n", 1132 | " Qnext_val = Q.eval(feed_dict={X:np.expand_dims(next_obs_batch,axis=1)})\n", 1133 | " Qnext_val = np.max(Qnext_val,axis=1,keepdims=True)\n", 1134 | " Qexpected_batch = reward_batch + gamma*(1-done_batch)*Qnext_val.ravel()\n", 1135 | " Qexpected_batch = np.expand_dims(Qexpected_batch,1)\n", 1136 | " train_op.run(feed_dict={X:np.expand_dims(obs_batch,axis=1),action_ph:action_batch,Qexpected:Qexpected_batch})\n", 1137 | " iteration += 1\n", 1138 | " episode_reward += reward\n", 1139 | " episode_steps += 1\n", 1140 | " if done:\n", 1141 | " if iteration>memory_warmup and episode%100==0:\n", 1142 | " print(', ep reward {}, ep steps {}'.format(episode_reward, episode_steps))\n", 1143 | " episode_reward = 0\n", 1144 | " episode_steps = 0\n", 1145 | " episode += 1\n", 1146 | " obs = env.reset()\n", 1147 | " else:\n", 1148 | " obs = next_obs\n", 1149 | "env.close()" 1150 | ] 1151 | }, 1152 | { 1153 | "cell_type": "code", 1154 | "execution_count": 6, 1155 | "metadata": { 1156 | "collapsed": false 1157 | }, 1158 | "outputs": [ 1159 | { 1160 | "name": "stderr", 1161 | "output_type": "stream", 1162 | "text": [ 1163 | "[2017-08-27 00:34:07,916] [Copy-v0] Uploading 5001 episodes of training data\n", 1164 | "[2017-08-27 00:34:10,513] [Copy-v0] Uploading videos of 15 training episodes (3807 bytes)\n", 1165 | "[2017-08-27 00:34:10,859] [Copy-v0] Creating evaluation object from ./tmp/ with learning curve and training video\n", 1166 | "[2017-08-27 00:34:11,425] \n", 1167 | "****************************************************\n", 1168 | "You successfully uploaded your evaluation on Copy-v0 to\n", 1169 | "OpenAI Gym! You can find it at:\n", 1170 | "\n", 1171 | " https://gym.openai.com/evaluations/eval_qdzwvK4RNa1Dx71JtH2g\n", 1172 | "\n", 1173 | "****************************************************\n" 1174 | ] 1175 | } 1176 | ], 1177 | "source": [ 1178 | "gym.upload('./tmp/',api_key='sk_BlwjttPKR6ZsXVrObENYA')" 1179 | ] 1180 | } 1181 | ], 1182 | "metadata": { 1183 | "anaconda-cloud": {}, 1184 | "kernelspec": { 1185 | "display_name": "Python [conda env:tensorflow]", 1186 | "language": "python", 1187 | "name": "conda-env-tensorflow-py" 1188 | }, 1189 | "language_info": { 1190 | "codemirror_mode": { 1191 | "name": "ipython", 1192 | "version": 3 1193 | }, 1194 | "file_extension": ".py", 1195 | "mimetype": "text/x-python", 1196 | "name": "python", 1197 | "nbconvert_exporter": "python", 1198 | "pygments_lexer": "ipython3", 1199 | "version": "3.6.2" 1200 | } 1201 | }, 1202 | "nbformat": 4, 1203 | "nbformat_minor": 1 1204 | } 1205 | -------------------------------------------------------------------------------- /Continuous_Deep_Deterministic_Policy_Gradient_Net/DDPG Class ver2 (Pendulum-v0).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Deep Deterministic Policy Gradient\n", 8 | "Implementation followed: Continuous control with deep reinforcement learning (arXiv:1509.02971v5)\n", 9 | "- Memory Relay\n", 10 | "- A3C\n", 11 | "- Trained with a target net\n", 12 | "- Initial exploration policy is quite important to warm up the net" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "2017-08-26T21:22:27.814572\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "import datetime\n", 32 | "print(datetime.datetime.now().isoformat())" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import numpy as np\n", 44 | "import tensorflow as tf\n", 45 | "from functools import partial" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "class Actor(object):\n", 57 | " def __init__(self, n_observation, n_action, name='actor_net'):\n", 58 | " self.n_observation = n_observation\n", 59 | " self.n_action = n_action\n", 60 | " self.name = name\n", 61 | " self.sess = None\n", 62 | " self.build_model()\n", 63 | " self.build_train()\n", 64 | " \n", 65 | " def build_model(self):\n", 66 | " activation = tf.nn.elu\n", 67 | " kernel_initializer = tf.contrib.layers.variance_scaling_initializer()\n", 68 | " kernel_regularizer = tf.contrib.layers.l2_regularizer(0.01)\n", 69 | " default_dense = partial(tf.layers.dense,\\\n", 70 | " activation=activation,\\\n", 71 | " kernel_initializer=kernel_initializer,\\\n", 72 | " kernel_regularizer=kernel_regularizer)\n", 73 | " with tf.variable_scope(self.name) as scope:\n", 74 | " observation = tf.placeholder(tf.float32,shape=[None,self.n_observation])\n", 75 | " hid1 = default_dense(observation,32)\n", 76 | " hid2 = default_dense(hid1,64)\n", 77 | " action = default_dense(hid2,self.n_action,activation=tf.nn.tanh,use_bias=False)\n", 78 | " trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=self.name)\n", 79 | " self.observation,self.action,self.trainable_vars = observation,action,trainable_vars\n", 80 | " \n", 81 | " def build_train(self,learning_rate = 0.0001):\n", 82 | " with tf.variable_scope(self.name) as scope:\n", 83 | " action_grads = tf.placeholder(tf.float32,[None,self.n_action])\n", 84 | " var_grads = tf.gradients(self.action,self.trainable_vars,-action_grads)\n", 85 | " train_op = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(var_grads,self.trainable_vars))\n", 86 | " self.action_grads,self.train_op = action_grads,train_op\n", 87 | " \n", 88 | " def predict_action(self,obs_batch):\n", 89 | " return self.action.eval(session=self.sess,feed_dict={self.observation:obs_batch})\n", 90 | "\n", 91 | " def train(self,obs_batch,action_grads):\n", 92 | " batch_size = len(action_grads)\n", 93 | " self.train_op.run(session=self.sess,feed_dict={self.observation:obs_batch,self.action_grads:action_grads/batch_size})\n", 94 | " \n", 95 | " def set_session(self,sess):\n", 96 | " self.sess = sess\n", 97 | " \n", 98 | " def get_trainable_dict(self):\n", 99 | " return {var.name[len(self.name):]: var for var in self.trainable_vars}" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 4, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "class Critic(object):\n", 111 | " def __init__(self, n_observation, n_action, name='critic_net'):\n", 112 | " self.n_observation = n_observation\n", 113 | " self.n_action = n_action\n", 114 | " self.name = name\n", 115 | " self.sess = None\n", 116 | " self.build_model()\n", 117 | " self.build_train()\n", 118 | " \n", 119 | " def build_model(self):\n", 120 | " activation = tf.nn.elu\n", 121 | " kernel_initializer = tf.contrib.layers.variance_scaling_initializer()\n", 122 | " kernel_regularizer = tf.contrib.layers.l2_regularizer(0.01)\n", 123 | " default_dense = partial(tf.layers.dense,\\\n", 124 | " activation=activation,\\\n", 125 | " kernel_initializer=kernel_initializer,\\\n", 126 | " kernel_regularizer=kernel_regularizer)\n", 127 | " with tf.variable_scope(self.name) as scope:\n", 128 | " observation = tf.placeholder(tf.float32,shape=[None,self.n_observation])\n", 129 | " action = tf.placeholder(tf.float32,shape=[None,self.n_action])\n", 130 | " hid1 = default_dense(observation,32)\n", 131 | " hid2 = default_dense(action,32)\n", 132 | " hid3 = tf.concat([hid1,hid2],axis=1)\n", 133 | " hid4 = default_dense(hid3,128)\n", 134 | " Q = default_dense(hid4,1, activation=None)\n", 135 | " trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=self.name)\n", 136 | " self.observation,self.action,self.Q,self.trainable_vars= observation,action,Q,trainable_vars\n", 137 | " \n", 138 | " def build_train(self,learning_rate=0.001):\n", 139 | " with tf.variable_scope(self.name) as scope:\n", 140 | " Qexpected = tf.placeholder(tf.float32,shape=[None,1])\n", 141 | " loss = tf.losses.mean_squared_error(Qexpected,self.Q)\n", 142 | " optimizer = tf.train.AdamOptimizer(learning_rate)\n", 143 | " train_op = optimizer.minimize(loss)\n", 144 | " self.Qexpected,self.train_op = Qexpected,train_op\n", 145 | " self.action_grads = tf.gradients(self.Q,self.action)[0]\n", 146 | " \n", 147 | " def predict_Q(self,obs_batch,action_batch):\n", 148 | " return self.Q.eval(session=self.sess,\\\n", 149 | " feed_dict={self.observation:obs_batch,self.action:action_batch})\n", 150 | " \n", 151 | " def compute_action_grads(self,obs_batch,action_batch):\n", 152 | " return self.action_grads.eval(session=self.sess,\\\n", 153 | " feed_dict={self.observation:obs_batch,self.action:action_batch})\n", 154 | " def train(self,obs_batch,action_batch,Qexpected_batch):\n", 155 | " self.train_op.run(session=self.sess,\\\n", 156 | " feed_dict={self.observation:obs_batch,self.action:action_batch,self.Qexpected:Qexpected_batch})\n", 157 | " \n", 158 | " def set_session(self,sess):\n", 159 | " self.sess = sess\n", 160 | " \n", 161 | " def get_trainable_dict(self):\n", 162 | " return {var.name[len(self.name):]: var for var in self.trainable_vars}" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 5, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "class AsyncNets(object):\n", 174 | " def __init__(self,class_name):\n", 175 | " class_ = eval(class_name)\n", 176 | " self.net = class_(3,1,name=class_name)\n", 177 | " self.target_net = class_(3,1,name='{}_target'.format(class_name))\n", 178 | " self.TAU = tf.placeholder(tf.float32,shape=None)\n", 179 | " self.sess = None\n", 180 | " self.__build_async_assign()\n", 181 | " \n", 182 | " def __build_async_assign(self):\n", 183 | " net_dict = self.net.get_trainable_dict()\n", 184 | " target_net_dict = self.target_net.get_trainable_dict()\n", 185 | " keys = net_dict.keys()\n", 186 | " async_update_op = [target_net_dict[key].assign((1-self.TAU)*target_net_dict[key]+self.TAU*net_dict[key]) \\\n", 187 | " for key in keys]\n", 188 | " self.async_update_op = async_update_op\n", 189 | " \n", 190 | " def async_update(self,tau=0.01):\n", 191 | " self.sess.run(self.async_update_op,feed_dict={self.TAU:tau})\n", 192 | " \n", 193 | " def set_session(self,sess):\n", 194 | " self.sess = sess\n", 195 | " self.net.set_session(sess)\n", 196 | " self.target_net.set_session(sess)\n", 197 | " \n", 198 | " def get_subnets(self):\n", 199 | " return self.net, self.target_net\n", 200 | " \n", 201 | " " 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 6, 207 | "metadata": { 208 | "collapsed": true 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "from collections import deque\n", 213 | "class Memory(object):\n", 214 | " def __init__(self,memory_size=10000):\n", 215 | " self.memory = deque(maxlen=memory_size)\n", 216 | " self.memory_size = memory_size\n", 217 | " \n", 218 | " def __len__(self):\n", 219 | " return len(self.memory)\n", 220 | " \n", 221 | " def append(self,item):\n", 222 | " self.memory.append(item)\n", 223 | " \n", 224 | " def sample_batch(self,batch_size=256):\n", 225 | " idx = np.random.permutation(len(self.memory))[:batch_size]\n", 226 | " return [self.memory[i] for i in idx]" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 7, 232 | "metadata": { 233 | "collapsed": true, 234 | "scrolled": false 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "def UONoise():\n", 239 | " theta = 0.15\n", 240 | " sigma = 0.2\n", 241 | " state = 0\n", 242 | " while True:\n", 243 | " yield state\n", 244 | " state += -theta*state+sigma*np.random.randn()" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 8, 250 | "metadata": { 251 | "collapsed": false, 252 | "scrolled": false 253 | }, 254 | "outputs": [ 255 | { 256 | "name": "stderr", 257 | "output_type": "stream", 258 | "text": [ 259 | "[2017-08-26 21:22:36,925] Making new env: Pendulum-v0\n", 260 | "[2017-08-26 21:22:36,973] Clearing 18 monitor files from previous run (because force=True was provided)\n", 261 | "[2017-08-26 21:22:36,975] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1408.video000000.mp4\n" 262 | ] 263 | }, 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "iter 193, ep 0" 269 | ] 270 | }, 271 | { 272 | "name": "stderr", 273 | "output_type": "stream", 274 | "text": [ 275 | "[2017-08-26 21:22:41,880] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1408.video000001.mp4\n" 276 | ] 277 | }, 278 | { 279 | "name": "stdout", 280 | "output_type": "stream", 281 | "text": [ 282 | "iter 199, ep 0, score -1064.337803, steps 200\n", 283 | "iter 399, ep 1, score -1452.929239, steps 200\n", 284 | "iter 599, ep 2, score -900.730704, steps 200\n", 285 | "iter 799, ep 3, score -979.678247, steps 200\n", 286 | "iter 999, ep 4, score -1464.379648, steps 200\n", 287 | "iter 1199, ep 5, score -964.648093, steps 200\n", 288 | "iter 1399, ep 6, score -1625.818235, steps 200\n", 289 | "iter 1591, ep 7" 290 | ] 291 | }, 292 | { 293 | "name": "stderr", 294 | "output_type": "stream", 295 | "text": [ 296 | "[2017-08-26 21:22:51,209] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1408.video000008.mp4\n" 297 | ] 298 | }, 299 | { 300 | "name": "stdout", 301 | "output_type": "stream", 302 | "text": [ 303 | "iter 1599, ep 7, score -1263.597151, steps 200\n", 304 | "iter 1799, ep 8, score -1451.116410, steps 200\n", 305 | "iter 1999, ep 9, score -992.277210, steps 200\n", 306 | "iter 2199, ep 10, score -1620.274533, steps 200\n", 307 | "iter 2399, ep 11, score -1731.880773, steps 200\n", 308 | "iter 2599, ep 12, score -1094.504865, steps 200\n", 309 | "iter 2799, ep 13, score -1741.083284, steps 200\n", 310 | "iter 2999, ep 14, score -1442.242095, steps 200\n", 311 | "iter 3199, ep 15, score -1098.027994, steps 200\n", 312 | "iter 3399, ep 16, score -1098.776404, steps 200\n", 313 | "iter 3599, ep 17, score -1798.072430, steps 200\n", 314 | "iter 3799, ep 18, score -1092.271777, steps 200\n", 315 | "iter 3999, ep 19, score -1021.271582, steps 200\n", 316 | "iter 4199, ep 20, score -1223.411688, steps 200\n", 317 | "iter 4399, ep 21, score -1309.548927, steps 200\n", 318 | "iter 4599, ep 22, score -1027.574288, steps 200\n", 319 | "iter 4799, ep 23, score -1188.868957, steps 200\n", 320 | "iter 4999, ep 24, score -1140.436419, steps 200\n", 321 | "iter 5199, ep 25, score -1235.387716, steps 200\n", 322 | "iter 5391, ep 26" 323 | ] 324 | }, 325 | { 326 | "name": "stderr", 327 | "output_type": "stream", 328 | "text": [ 329 | "[2017-08-26 21:23:18,553] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1408.video000027.mp4\n" 330 | ] 331 | }, 332 | { 333 | "name": "stdout", 334 | "output_type": "stream", 335 | "text": [ 336 | "iter 5399, ep 26, score -987.548204, steps 200\n", 337 | "iter 5599, ep 27, score -1619.530272, steps 200\n", 338 | "iter 5799, ep 28, score -968.738765, steps 200\n", 339 | "iter 5999, ep 29, score -988.404297, steps 200\n", 340 | "iter 6199, ep 30, score -858.700286, steps 200\n", 341 | "iter 6399, ep 31, score -1725.719085, steps 200\n", 342 | "iter 6599, ep 32, score -1411.769340, steps 200\n", 343 | "iter 6799, ep 33, score -1499.526679, steps 200\n", 344 | "iter 6999, ep 34, score -1111.330258, steps 200\n", 345 | "iter 7199, ep 35, score -1268.784305, steps 200\n", 346 | "iter 7399, ep 36, score -1227.741326, steps 200\n", 347 | "iter 7599, ep 37, score -1479.133063, steps 200\n", 348 | "iter 7799, ep 38, score -1403.646349, steps 200\n", 349 | "iter 7999, ep 39, score -1484.729898, steps 200\n", 350 | "iter 8199, ep 40, score -1298.216238, steps 200\n", 351 | "iter 8399, ep 41, score -1098.002777, steps 200\n", 352 | "iter 8599, ep 42, score -1182.987359, steps 200\n", 353 | "iter 8799, ep 43, score -1471.492544, steps 200\n", 354 | "iter 8999, ep 44, score -1211.134201, steps 200\n", 355 | "iter 9199, ep 45, score -1132.366484, steps 200\n", 356 | "iter 9399, ep 46, score -1160.883524, steps 200\n", 357 | "iter 9599, ep 47, score -992.302824, steps 200\n", 358 | "iter 9799, ep 48, score -980.710118, steps 200\n", 359 | "iter 9999, ep 49, score -1008.958237, steps 200\n", 360 | "iter 10199, ep 50, score -807.954371, steps 200\n", 361 | "iter 10399, ep 51, score -1101.055201, steps 200\n", 362 | "iter 10599, ep 52, score -1165.670182, steps 200\n", 363 | "iter 10799, ep 53, score -1144.186126, steps 200\n", 364 | "iter 10999, ep 54, score -977.523134, steps 200\n", 365 | "iter 11199, ep 55, score -753.598748, steps 200\n", 366 | "iter 11399, ep 56, score -1039.993851, steps 200\n", 367 | "iter 11599, ep 57, score -638.446753, steps 200\n", 368 | "iter 11799, ep 58, score -753.432776, steps 200\n", 369 | "iter 11999, ep 59, score -377.969747, steps 200\n", 370 | "iter 12199, ep 60, score -255.884073, steps 200\n", 371 | "iter 12399, ep 61, score -251.304605, steps 200\n", 372 | "iter 12599, ep 62, score -480.490893, steps 200\n", 373 | "iter 12788, ep 63" 374 | ] 375 | }, 376 | { 377 | "name": "stderr", 378 | "output_type": "stream", 379 | "text": [ 380 | "[2017-08-26 21:24:08,642] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1408.video000064.mp4\n" 381 | ] 382 | }, 383 | { 384 | "name": "stdout", 385 | "output_type": "stream", 386 | "text": [ 387 | "iter 12799, ep 63, score -514.583627, steps 200\n", 388 | "iter 12999, ep 64, score -492.792294, steps 200\n", 389 | "iter 13199, ep 65, score -0.809584, steps 200\n", 390 | "iter 13399, ep 66, score -128.301171, steps 200\n", 391 | "iter 13599, ep 67, score -129.260956, steps 200\n", 392 | "iter 13799, ep 68, score -377.287180, steps 200\n", 393 | "iter 13999, ep 69, score -368.860775, steps 200\n", 394 | "iter 14199, ep 70, score -123.629120, steps 200\n", 395 | "iter 14399, ep 71, score -384.817258, steps 200\n", 396 | "iter 14599, ep 72, score -0.677299, steps 200\n", 397 | "iter 14799, ep 73, score -370.016429, steps 200\n", 398 | "iter 14999, ep 74, score -259.441145, steps 200\n", 399 | "iter 15199, ep 75, score -237.910846, steps 200\n", 400 | "iter 15399, ep 76, score -119.175965, steps 200\n", 401 | "iter 15599, ep 77, score -516.978731, steps 200\n", 402 | "iter 15799, ep 78, score -1344.161227, steps 200\n", 403 | "iter 15999, ep 79, score -0.807787, steps 200\n", 404 | "iter 16199, ep 80, score -114.978708, steps 200\n", 405 | "iter 16399, ep 81, score -768.592530, steps 200\n", 406 | "iter 16599, ep 82, score -128.405298, steps 200\n", 407 | "iter 16799, ep 83, score -0.127020, steps 200\n", 408 | "iter 16999, ep 84, score -349.809474, steps 200\n", 409 | "iter 17199, ep 85, score -964.106506, steps 200\n", 410 | "iter 17399, ep 86, score -0.359138, steps 200\n", 411 | "iter 17599, ep 87, score -1188.854105, steps 200\n", 412 | "iter 17799, ep 88, score -1182.499609, steps 200\n", 413 | "iter 17999, ep 89, score -119.868426, steps 200\n", 414 | "iter 18199, ep 90, score -374.429581, steps 200\n", 415 | "iter 18399, ep 91, score -243.802135, steps 200\n", 416 | "iter 18599, ep 92, score -240.095410, steps 200\n", 417 | "iter 18799, ep 93, score -118.592989, steps 200\n", 418 | "iter 18999, ep 94, score -122.878965, steps 200\n", 419 | "iter 19199, ep 95, score -123.732260, steps 200\n", 420 | "iter 19399, ep 96, score -122.181211, steps 200\n", 421 | "iter 19599, ep 97, score -252.719152, steps 200\n", 422 | "iter 19799, ep 98, score -128.893474, steps 200\n", 423 | "iter 19999, ep 99, score -255.153885, steps 200\n", 424 | "iter 20199, ep 100, score -244.093991, steps 200\n", 425 | "iter 20399, ep 101, score -245.012112, steps 200\n", 426 | "iter 20599, ep 102, score -125.532636, steps 200\n", 427 | "iter 20799, ep 103, score -281.169863, steps 200\n", 428 | "iter 20999, ep 104, score -358.179986, steps 200\n", 429 | "iter 21199, ep 105, score -124.554989, steps 200\n", 430 | "iter 21399, ep 106, score -263.171641, steps 200\n", 431 | "iter 21599, ep 107, score -243.242076, steps 200\n", 432 | "iter 21799, ep 108, score -1.345710, steps 200\n", 433 | "iter 21999, ep 109, score -132.690177, steps 200\n", 434 | "iter 22199, ep 110, score -124.921505, steps 200\n", 435 | "iter 22399, ep 111, score -126.223287, steps 200\n", 436 | "iter 22599, ep 112, score -123.857903, steps 200\n", 437 | "iter 22799, ep 113, score -129.218141, steps 200\n", 438 | "iter 22999, ep 114, score -126.714810, steps 200\n", 439 | "iter 23199, ep 115, score -3.717351, steps 200\n", 440 | "iter 23399, ep 116, score -242.727221, steps 200\n", 441 | "iter 23599, ep 117, score -252.102512, steps 200\n", 442 | "iter 23799, ep 118, score -131.549840, steps 200\n", 443 | "iter 23999, ep 119, score -131.233809, steps 200\n", 444 | "iter 24199, ep 120, score -131.400693, steps 200\n", 445 | "iter 24399, ep 121, score -261.426106, steps 200\n", 446 | "iter 24599, ep 122, score -253.119367, steps 200\n", 447 | "iter 24799, ep 123, score -259.357212, steps 200\n", 448 | "iter 24984, ep 124" 449 | ] 450 | }, 451 | { 452 | "name": "stderr", 453 | "output_type": "stream", 454 | "text": [ 455 | "[2017-08-26 21:25:33,603] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1408.video000125.mp4\n" 456 | ] 457 | }, 458 | { 459 | "name": "stdout", 460 | "output_type": "stream", 461 | "text": [ 462 | "iter 24999, ep 124, score -260.784997, steps 200\n", 463 | "iter 25199, ep 125, score -125.729126, steps 200\n", 464 | "iter 25399, ep 126, score -260.855328, steps 200\n", 465 | "iter 25599, ep 127, score -385.974153, steps 200\n", 466 | "iter 25799, ep 128, score -127.646294, steps 200\n", 467 | "iter 25999, ep 129, score -250.428526, steps 200\n", 468 | "iter 26199, ep 130, score -270.953140, steps 200\n", 469 | "iter 26399, ep 131, score -389.787544, steps 200\n", 470 | "iter 26599, ep 132, score -250.267412, steps 200\n", 471 | "iter 26799, ep 133, score -257.369616, steps 200\n", 472 | "iter 26999, ep 134, score -375.464715, steps 200\n", 473 | "iter 27199, ep 135, score -335.487404, steps 200\n", 474 | "iter 27399, ep 136, score -262.835743, steps 200\n", 475 | "iter 27599, ep 137, score -492.912502, steps 200\n", 476 | "iter 27799, ep 138, score -259.321548, steps 200\n", 477 | "iter 27999, ep 139, score -364.541715, steps 200\n", 478 | "iter 28199, ep 140, score -129.515003, steps 200\n", 479 | "iter 28399, ep 141, score -240.775872, steps 200\n", 480 | "iter 28599, ep 142, score -243.228757, steps 200\n", 481 | "iter 28799, ep 143, score -245.208978, steps 200\n", 482 | "iter 28999, ep 144, score -355.964859, steps 200\n", 483 | "iter 29199, ep 145, score -129.415597, steps 200\n", 484 | "iter 29399, ep 146, score -250.975489, steps 200\n", 485 | "iter 29599, ep 147, score -252.039610, steps 200\n", 486 | "iter 29799, ep 148, score -244.715129, steps 200\n", 487 | "iter 29999, ep 149, score -130.504347, steps 200\n", 488 | "iter 30199, ep 150, score -354.974179, steps 200\n", 489 | "iter 30399, ep 151, score -117.845393, steps 200\n", 490 | "iter 30599, ep 152, score -260.611639, steps 200\n", 491 | "iter 30799, ep 153, score -4.214091, steps 200\n", 492 | "iter 30999, ep 154, score -4.594981, steps 200\n", 493 | "iter 31199, ep 155, score -239.442948, steps 200\n", 494 | "iter 31399, ep 156, score -238.317667, steps 200\n", 495 | "iter 31599, ep 157, score -1059.610431, steps 200\n", 496 | "iter 31799, ep 158, score -242.727606, steps 200\n", 497 | "iter 31999, ep 159, score -259.869131, steps 200\n", 498 | "iter 32199, ep 160, score -128.741351, steps 200\n", 499 | "iter 32399, ep 161, score -3.639193, steps 200\n", 500 | "iter 32599, ep 162, score -236.103353, steps 200\n", 501 | "iter 32799, ep 163, score -130.198551, steps 200\n", 502 | "iter 32999, ep 164, score -238.857995, steps 200\n", 503 | "iter 33199, ep 165, score -117.932390, steps 200\n", 504 | "iter 33399, ep 166, score -377.000371, steps 200\n", 505 | "iter 33599, ep 167, score -124.129138, steps 200\n", 506 | "iter 33799, ep 168, score -121.294558, steps 200\n", 507 | "iter 33999, ep 169, score -1186.945392, steps 200\n", 508 | "iter 34199, ep 170, score -1233.404024, steps 200\n", 509 | "iter 34399, ep 171, score -130.510697, steps 200\n", 510 | "iter 34599, ep 172, score -4.982641, steps 200\n", 511 | "iter 34799, ep 173, score -131.216311, steps 200\n", 512 | "iter 34999, ep 174, score -378.319176, steps 200\n", 513 | "iter 35199, ep 175, score -298.653596, steps 200\n", 514 | "iter 35399, ep 176, score -251.740129, steps 200\n", 515 | "iter 35599, ep 177, score -277.894682, steps 200\n", 516 | "iter 35799, ep 178, score -135.317220, steps 200\n", 517 | "iter 35999, ep 179, score -127.316735, steps 200\n", 518 | "iter 36199, ep 180, score -132.822921, steps 200\n", 519 | "iter 36399, ep 181, score -130.992163, steps 200\n", 520 | "iter 36599, ep 182, score -266.365690, steps 200\n", 521 | "iter 36799, ep 183, score -7.579537, steps 200\n", 522 | "iter 36999, ep 184, score -239.702124, steps 200\n", 523 | "iter 37199, ep 185, score -119.325143, steps 200\n", 524 | "iter 37399, ep 186, score -120.196804, steps 200\n", 525 | "iter 37599, ep 187, score -125.847208, steps 200\n", 526 | "iter 37799, ep 188, score -131.916780, steps 200\n", 527 | "iter 37999, ep 189, score -239.109891, steps 200\n", 528 | "iter 38199, ep 190, score -5.310116, steps 200\n", 529 | "iter 38399, ep 191, score -350.563647, steps 200\n", 530 | "iter 38599, ep 192, score -352.134777, steps 200\n", 531 | "iter 38799, ep 193, score -126.908148, steps 200\n", 532 | "iter 38999, ep 194, score -129.344405, steps 200\n", 533 | "iter 39199, ep 195, score -129.438627, steps 200\n", 534 | "iter 39399, ep 196, score -127.880176, steps 200\n", 535 | "iter 39599, ep 197, score -246.035287, steps 200\n", 536 | "iter 39799, ep 198, score -234.146873, steps 200\n", 537 | "iter 39999, ep 199, score -122.887167, steps 200\n", 538 | "iter 40199, ep 200, score -4.577153, steps 200\n", 539 | "iter 40399, ep 201, score -131.089583, steps 200\n", 540 | "iter 40599, ep 202, score -344.935548, steps 200\n", 541 | "iter 40799, ep 203, score -117.195885, steps 200\n", 542 | "iter 40999, ep 204, score -303.608026, steps 200\n", 543 | "iter 41199, ep 205, score -124.968612, steps 200\n", 544 | "iter 41399, ep 206, score -126.725937, steps 200\n", 545 | "iter 41599, ep 207, score -114.673181, steps 200\n", 546 | "iter 41799, ep 208, score -249.000591, steps 200\n", 547 | "iter 41999, ep 209, score -1.623344, steps 200\n", 548 | "iter 42199, ep 210, score -1.010534, steps 200\n", 549 | "iter 42399, ep 211, score -123.786262, steps 200\n", 550 | "iter 42599, ep 212, score -125.591439, steps 200\n", 551 | "iter 42799, ep 213, score -2.525032, steps 200\n", 552 | "iter 42999, ep 214, score -265.754485, steps 200\n", 553 | "iter 43172, ep 215" 554 | ] 555 | }, 556 | { 557 | "name": "stderr", 558 | "output_type": "stream", 559 | "text": [ 560 | "[2017-08-26 21:27:37,112] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1408.video000216.mp4\n" 561 | ] 562 | }, 563 | { 564 | "name": "stdout", 565 | "output_type": "stream", 566 | "text": [ 567 | "iter 43199, ep 215, score -125.225655, steps 200\n", 568 | "iter 43399, ep 216, score -3.873638, steps 200\n", 569 | "iter 43599, ep 217, score -122.130962, steps 200\n", 570 | "iter 43799, ep 218, score -124.214464, steps 200\n", 571 | "iter 43999, ep 219, score -127.464848, steps 200\n", 572 | "iter 44199, ep 220, score -360.385836, steps 200\n", 573 | "iter 44399, ep 221, score -240.291437, steps 200\n", 574 | "iter 44599, ep 222, score -240.390900, steps 200\n", 575 | "iter 44799, ep 223, score -124.180430, steps 200\n", 576 | "iter 44999, ep 224, score -116.245345, steps 200\n", 577 | "iter 45199, ep 225, score -117.838239, steps 200\n", 578 | "iter 45399, ep 226, score -124.368810, steps 200\n", 579 | "iter 45599, ep 227, score -125.467418, steps 200\n", 580 | "iter 45799, ep 228, score -120.172418, steps 200\n", 581 | "iter 45999, ep 229, score -118.514655, steps 200\n", 582 | "iter 46199, ep 230, score -5.093049, steps 200\n", 583 | "iter 46399, ep 231, score -135.675310, steps 200\n", 584 | "iter 46599, ep 232, score -258.219327, steps 200\n", 585 | "iter 46799, ep 233, score -233.724384, steps 200\n", 586 | "iter 46999, ep 234, score -5.685182, steps 200\n", 587 | "iter 47199, ep 235, score -4.681136, steps 200\n", 588 | "iter 47399, ep 236, score -254.856026, steps 200\n", 589 | "iter 47599, ep 237, score -121.647198, steps 200\n", 590 | "iter 47799, ep 238, score -130.115954, steps 200\n", 591 | "iter 47999, ep 239, score -5.506703, steps 200\n", 592 | "iter 48199, ep 240, score -335.422943, steps 200\n", 593 | "iter 48399, ep 241, score -370.172429, steps 200\n", 594 | "iter 48599, ep 242, score -237.071299, steps 200\n", 595 | "iter 48799, ep 243, score -334.830867, steps 200\n", 596 | "iter 48999, ep 244, score -265.557115, steps 200\n", 597 | "iter 49199, ep 245, score -240.852207, steps 200\n", 598 | "iter 49399, ep 246, score -6.466158, steps 200\n", 599 | "iter 49599, ep 247, score -132.558734, steps 200\n", 600 | "iter 49799, ep 248, score -330.944252, steps 200\n", 601 | "iter 49999, ep 249, score -248.648816, steps 200\n", 602 | "iter 50199, ep 250, score -252.133982, steps 200\n", 603 | "iter 50399, ep 251, score -123.479560, steps 200\n", 604 | "iter 50599, ep 252, score -132.727016, steps 200\n", 605 | "iter 50799, ep 253, score -242.176115, steps 200\n", 606 | "iter 50999, ep 254, score -132.175714, steps 200\n", 607 | "iter 51199, ep 255, score -270.003987, steps 200\n", 608 | "iter 51399, ep 256, score -128.177940, steps 200\n", 609 | "iter 51599, ep 257, score -133.668196, steps 200\n", 610 | "iter 51799, ep 258, score -122.770314, steps 200\n", 611 | "iter 51999, ep 259, score -133.011755, steps 200\n", 612 | "iter 52199, ep 260, score -119.984557, steps 200\n", 613 | "iter 52399, ep 261, score -336.873420, steps 200\n", 614 | "iter 52599, ep 262, score -239.043861, steps 200\n", 615 | "iter 52799, ep 263, score -239.302411, steps 200\n", 616 | "iter 52999, ep 264, score -123.102939, steps 200\n", 617 | "iter 53199, ep 265, score -130.724306, steps 200\n", 618 | "iter 53399, ep 266, score -133.556476, steps 200\n", 619 | "iter 53599, ep 267, score -271.536447, steps 200\n", 620 | "iter 53799, ep 268, score -123.018146, steps 200\n", 621 | "iter 53999, ep 269, score -6.970198, steps 200\n", 622 | "iter 54199, ep 270, score -130.543401, steps 200\n", 623 | "iter 54399, ep 271, score -7.032991, steps 200\n", 624 | "iter 54599, ep 272, score -129.102556, steps 200\n", 625 | "iter 54799, ep 273, score -129.623063, steps 200\n", 626 | "iter 54999, ep 274, score -130.037734, steps 200\n", 627 | "iter 55199, ep 275, score -354.135605, steps 200\n", 628 | "iter 55399, ep 276, score -123.412699, steps 200\n", 629 | "iter 55599, ep 277, score -8.412939, steps 200\n", 630 | "iter 55799, ep 278, score -120.640051, steps 200\n", 631 | "iter 55999, ep 279, score -133.080594, steps 200\n", 632 | "iter 56199, ep 280, score -132.837005, steps 200\n", 633 | "iter 56399, ep 281, score -121.366445, steps 200\n", 634 | "iter 56599, ep 282, score -7.570586, steps 200\n", 635 | "iter 56799, ep 283, score -132.751188, steps 200\n", 636 | "iter 56999, ep 284, score -134.402747, steps 200\n", 637 | "iter 57199, ep 285, score -128.237481, steps 200\n", 638 | "iter 57399, ep 286, score -132.577115, steps 200\n", 639 | "iter 57599, ep 287, score -7.601648, steps 200\n", 640 | "iter 57799, ep 288, score -7.176242, steps 200\n", 641 | "iter 57999, ep 289, score -311.986878, steps 200\n", 642 | "iter 58199, ep 290, score -125.075807, steps 200\n", 643 | "iter 58399, ep 291, score -132.869319, steps 200\n", 644 | "iter 58599, ep 292, score -246.885029, steps 200\n", 645 | "iter 58799, ep 293, score -136.779909, steps 200\n", 646 | "iter 58999, ep 294, score -118.865635, steps 200\n", 647 | "iter 59199, ep 295, score -125.123747, steps 200\n", 648 | "iter 59399, ep 296, score -266.036222, steps 200\n", 649 | "iter 59599, ep 297, score -280.558942, steps 200\n", 650 | "iter 59799, ep 298, score -6.365290, steps 200\n", 651 | "iter 59999, ep 299, score -333.505314, steps 200\n", 652 | "iter 60199, ep 300, score -257.021949, steps 200\n", 653 | "iter 60399, ep 301, score -3.614302, steps 200\n", 654 | "iter 60599, ep 302, score -122.959251, steps 200\n", 655 | "iter 60799, ep 303, score -232.235837, steps 200\n", 656 | "iter 60999, ep 304, score -237.746261, steps 200\n", 657 | "iter 61199, ep 305, score -121.236568, steps 200\n", 658 | "iter 61399, ep 306, score -243.285471, steps 200\n", 659 | "iter 61599, ep 307, score -123.550587, steps 200\n", 660 | "iter 61799, ep 308, score -128.397281, steps 200\n", 661 | "iter 61999, ep 309, score -128.373443, steps 200\n", 662 | "iter 62199, ep 310, score -128.597447, steps 200\n", 663 | "iter 62399, ep 311, score -116.914463, steps 200\n", 664 | "iter 62599, ep 312, score -240.864486, steps 200\n", 665 | "iter 62799, ep 313, score -122.356025, steps 200\n", 666 | "iter 62999, ep 314, score -120.251762, steps 200\n", 667 | "iter 63199, ep 315, score -239.192689, steps 200\n", 668 | "iter 63399, ep 316, score -126.717909, steps 200\n", 669 | "iter 63599, ep 317, score -245.941814, steps 200\n", 670 | "iter 63799, ep 318, score -127.907692, steps 200\n", 671 | "iter 63999, ep 319, score -243.145524, steps 200\n", 672 | "iter 64199, ep 320, score -116.222546, steps 200\n", 673 | "iter 64399, ep 321, score -5.437299, steps 200\n", 674 | "iter 64599, ep 322, score -260.395145, steps 200\n", 675 | "iter 64799, ep 323, score -244.930093, steps 200\n", 676 | "iter 64999, ep 324, score -242.066035, steps 200\n", 677 | "iter 65199, ep 325, score -247.987432, steps 200\n", 678 | "iter 65399, ep 326, score -276.596887, steps 200\n", 679 | "iter 65599, ep 327, score -242.305310, steps 200\n", 680 | "iter 65799, ep 328, score -129.111124, steps 200\n", 681 | "iter 65999, ep 329, score -126.496342, steps 200\n", 682 | "iter 66199, ep 330, score -127.199991, steps 200\n", 683 | "iter 66399, ep 331, score -4.191070, steps 200\n", 684 | "iter 66599, ep 332, score -125.634870, steps 200\n", 685 | "iter 66799, ep 333, score -256.336007, steps 200\n", 686 | "iter 66999, ep 334, score -235.038112, steps 200\n", 687 | "iter 67199, ep 335, score -244.274242, steps 200\n", 688 | "iter 67399, ep 336, score -4.488644, steps 200\n", 689 | "iter 67599, ep 337, score -4.704433, steps 200\n", 690 | "iter 67799, ep 338, score -314.068747, steps 200\n", 691 | "iter 67999, ep 339, score -128.962556, steps 200\n", 692 | "iter 68199, ep 340, score -352.277938, steps 200\n", 693 | "iter 68399, ep 341, score -119.929162, steps 200\n", 694 | "iter 68595, ep 342" 695 | ] 696 | }, 697 | { 698 | "name": "stderr", 699 | "output_type": "stream", 700 | "text": [ 701 | "[2017-08-26 21:30:27,232] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1408.video000343.mp4\n" 702 | ] 703 | }, 704 | { 705 | "name": "stdout", 706 | "output_type": "stream", 707 | "text": [ 708 | "iter 68599, ep 342, score -366.539098, steps 200\n", 709 | "iter 68799, ep 343, score -119.283715, steps 200\n", 710 | "iter 68999, ep 344, score -264.026636, steps 200\n", 711 | "iter 69199, ep 345, score -127.457260, steps 200\n", 712 | "iter 69399, ep 346, score -126.855899, steps 200\n", 713 | "iter 69599, ep 347, score -116.423614, steps 200\n", 714 | "iter 69799, ep 348, score -365.954037, steps 200\n", 715 | "iter 69999, ep 349, score -267.084830, steps 200\n", 716 | "iter 70199, ep 350, score -125.671618, steps 200\n", 717 | "iter 70399, ep 351, score -368.480243, steps 200\n", 718 | "iter 70599, ep 352, score -235.926462, steps 200\n", 719 | "iter 70799, ep 353, score -125.897110, steps 200\n", 720 | "iter 70999, ep 354, score -298.170883, steps 200\n", 721 | "iter 71199, ep 355, score -336.450492, steps 200\n", 722 | "iter 71399, ep 356, score -358.646415, steps 200\n", 723 | "iter 71599, ep 357, score -129.539452, steps 200\n", 724 | "iter 71799, ep 358, score -126.687282, steps 200\n", 725 | "iter 71999, ep 359, score -128.661384, steps 200\n", 726 | "iter 72199, ep 360, score -124.836461, steps 200\n", 727 | "iter 72399, ep 361, score -239.609822, steps 200\n", 728 | "iter 72599, ep 362, score -128.085954, steps 200\n", 729 | "iter 72799, ep 363, score -250.857986, steps 200\n", 730 | "iter 72999, ep 364, score -367.498097, steps 200\n", 731 | "iter 73199, ep 365, score -118.573546, steps 200\n", 732 | "iter 73399, ep 366, score -357.616653, steps 200\n", 733 | "iter 73599, ep 367, score -238.981204, steps 200\n", 734 | "iter 73799, ep 368, score -4.328976, steps 200\n", 735 | "iter 73999, ep 369, score -242.917910, steps 200\n", 736 | "iter 74199, ep 370, score -372.671354, steps 200\n", 737 | "iter 74399, ep 371, score -245.484786, steps 200\n", 738 | "iter 74599, ep 372, score -127.712747, steps 200\n", 739 | "iter 74799, ep 373, score -246.383811, steps 200\n", 740 | "iter 74999, ep 374, score -4.345438, steps 200\n", 741 | "iter 75199, ep 375, score -266.373024, steps 200\n", 742 | "iter 75399, ep 376, score -122.290439, steps 200\n", 743 | "iter 75599, ep 377, score -270.718117, steps 200\n", 744 | "iter 75799, ep 378, score -4.465336, steps 200\n", 745 | "iter 75999, ep 379, score -236.911200, steps 200\n", 746 | "iter 76199, ep 380, score -356.773776, steps 200\n", 747 | "iter 76399, ep 381, score -130.141630, steps 200\n", 748 | "iter 76599, ep 382, score -233.822707, steps 200\n", 749 | "iter 76799, ep 383, score -248.834029, steps 200\n", 750 | "iter 76999, ep 384, score -254.197619, steps 200\n", 751 | "iter 77199, ep 385, score -130.698491, steps 200\n", 752 | "iter 77399, ep 386, score -130.011718, steps 200\n", 753 | "iter 77599, ep 387, score -130.616305, steps 200\n", 754 | "iter 77799, ep 388, score -132.142333, steps 200\n", 755 | "iter 77999, ep 389, score -4.970126, steps 200\n", 756 | "iter 78199, ep 390, score -4.525809, steps 200\n", 757 | "iter 78399, ep 391, score -349.493168, steps 200\n", 758 | "iter 78599, ep 392, score -4.509693, steps 200\n", 759 | "iter 78799, ep 393, score -120.290129, steps 200\n", 760 | "iter 78999, ep 394, score -124.072395, steps 200\n", 761 | "iter 79199, ep 395, score -117.970291, steps 200\n", 762 | "iter 79399, ep 396, score -119.131752, steps 200\n", 763 | "iter 79599, ep 397, score -123.907668, steps 200\n", 764 | "iter 79799, ep 398, score -127.259068, steps 200\n", 765 | "iter 79999, ep 399, score -272.045071, steps 200\n", 766 | "iter 80199, ep 400, score -122.568052, steps 200\n", 767 | "iter 80399, ep 401, score -128.302822, steps 200\n", 768 | "iter 80599, ep 402, score -242.071540, steps 200\n", 769 | "iter 80799, ep 403, score -236.555885, steps 200\n", 770 | "iter 80999, ep 404, score -254.638892, steps 200\n", 771 | "iter 81199, ep 405, score -4.559032, steps 200\n", 772 | "iter 81399, ep 406, score -338.747843, steps 200\n", 773 | "iter 81599, ep 407, score -271.123363, steps 200\n", 774 | "iter 81799, ep 408, score -129.768551, steps 200\n", 775 | "iter 81999, ep 409, score -130.282053, steps 200\n", 776 | "iter 82199, ep 410, score -132.270137, steps 200\n", 777 | "iter 82399, ep 411, score -238.836504, steps 200\n", 778 | "iter 82599, ep 412, score -122.466685, steps 200\n", 779 | "iter 82799, ep 413, score -338.361574, steps 200\n", 780 | "iter 82999, ep 414, score -131.548065, steps 200\n", 781 | "iter 83199, ep 415, score -133.290980, steps 200\n", 782 | "iter 83399, ep 416, score -134.987242, steps 200\n", 783 | "iter 83599, ep 417, score -134.078996, steps 200\n", 784 | "iter 83799, ep 418, score -126.560450, steps 200\n", 785 | "iter 83999, ep 419, score -124.045671, steps 200\n", 786 | "iter 84199, ep 420, score -252.840873, steps 200\n", 787 | "iter 84399, ep 421, score -119.901363, steps 200\n", 788 | "iter 84599, ep 422, score -248.463898, steps 200\n", 789 | "iter 84799, ep 423, score -259.310671, steps 200\n", 790 | "iter 84999, ep 424, score -5.879516, steps 200\n", 791 | "iter 85199, ep 425, score -125.992997, steps 200\n", 792 | "iter 85399, ep 426, score -130.772884, steps 200\n", 793 | "iter 85599, ep 427, score -133.960069, steps 200\n", 794 | "iter 85799, ep 428, score -128.748451, steps 200\n", 795 | "iter 85999, ep 429, score -5.323233, steps 200\n", 796 | "iter 86199, ep 430, score -125.930848, steps 200\n", 797 | "iter 86399, ep 431, score -131.249564, steps 200\n", 798 | "iter 86599, ep 432, score -121.955848, steps 200\n", 799 | "iter 86799, ep 433, score -126.726698, steps 200\n", 800 | "iter 86999, ep 434, score -129.092343, steps 200\n", 801 | "iter 87199, ep 435, score -120.472004, steps 200\n", 802 | "iter 87399, ep 436, score -123.544131, steps 200\n", 803 | "iter 87599, ep 437, score -253.988645, steps 200\n", 804 | "iter 87799, ep 438, score -123.497215, steps 200\n", 805 | "iter 87999, ep 439, score -3.045889, steps 200\n", 806 | "iter 88199, ep 440, score -120.507718, steps 200\n", 807 | "iter 88399, ep 441, score -240.431019, steps 200\n", 808 | "iter 88599, ep 442, score -3.009580, steps 200\n", 809 | "iter 88799, ep 443, score -307.113333, steps 200\n", 810 | "iter 88999, ep 444, score -127.279165, steps 200\n", 811 | "iter 89199, ep 445, score -122.846385, steps 200\n", 812 | "iter 89399, ep 446, score -124.607087, steps 200\n", 813 | "iter 89599, ep 447, score -238.558577, steps 200\n", 814 | "iter 89799, ep 448, score -233.133597, steps 200\n", 815 | "iter 89999, ep 449, score -122.499911, steps 200\n", 816 | "iter 90199, ep 450, score -234.558908, steps 200\n", 817 | "iter 90399, ep 451, score -322.962568, steps 200\n", 818 | "iter 90599, ep 452, score -231.654030, steps 200\n", 819 | "iter 90799, ep 453, score -124.538077, steps 200\n", 820 | "iter 90999, ep 454, score -239.473319, steps 200\n", 821 | "iter 91199, ep 455, score -267.782583, steps 200\n", 822 | "iter 91399, ep 456, score -301.449482, steps 200\n", 823 | "iter 91599, ep 457, score -119.068361, steps 200\n", 824 | "iter 91799, ep 458, score -123.199646, steps 200\n", 825 | "iter 91999, ep 459, score -119.289888, steps 200\n", 826 | "iter 92199, ep 460, score -344.746766, steps 200\n", 827 | "iter 92399, ep 461, score -131.218601, steps 200\n", 828 | "iter 92599, ep 462, score -124.328458, steps 200\n", 829 | "iter 92799, ep 463, score -242.595299, steps 200\n", 830 | "iter 92999, ep 464, score -118.777685, steps 200\n", 831 | "iter 93199, ep 465, score -124.753331, steps 200\n", 832 | "iter 93399, ep 466, score -125.403707, steps 200\n", 833 | "iter 93599, ep 467, score -354.711999, steps 200\n", 834 | "iter 93799, ep 468, score -123.824846, steps 200\n", 835 | "iter 93999, ep 469, score -128.554761, steps 200\n", 836 | "iter 94199, ep 470, score -129.176413, steps 200\n", 837 | "iter 94399, ep 471, score -121.574554, steps 200\n", 838 | "iter 94599, ep 472, score -116.763944, steps 200\n", 839 | "iter 94799, ep 473, score -125.687042, steps 200\n", 840 | "iter 94999, ep 474, score -2.600989, steps 200\n", 841 | "iter 95199, ep 475, score -252.568422, steps 200\n", 842 | "iter 95399, ep 476, score -261.405147, steps 200\n", 843 | "iter 95599, ep 477, score -257.657228, steps 200\n", 844 | "iter 95799, ep 478, score -119.638925, steps 200\n", 845 | "iter 95999, ep 479, score -126.184898, steps 200\n", 846 | "iter 96199, ep 480, score -5.295040, steps 200\n", 847 | "iter 96399, ep 481, score -262.848053, steps 200\n", 848 | "iter 96599, ep 482, score -276.019287, steps 200\n", 849 | "iter 96799, ep 483, score -118.643215, steps 200\n", 850 | "iter 96999, ep 484, score -133.585187, steps 200\n", 851 | "iter 97199, ep 485, score -132.039005, steps 200\n", 852 | "iter 97399, ep 486, score -121.752587, steps 200\n", 853 | "iter 97599, ep 487, score -259.833304, steps 200\n", 854 | "iter 97799, ep 488, score -128.846781, steps 200\n", 855 | "iter 97999, ep 489, score -369.836613, steps 200\n", 856 | "iter 98199, ep 490, score -116.918012, steps 200\n", 857 | "iter 98399, ep 491, score -123.747513, steps 200\n", 858 | "iter 98599, ep 492, score -4.510867, steps 200\n", 859 | "iter 98799, ep 493, score -121.577638, steps 200\n", 860 | "iter 98999, ep 494, score -120.418638, steps 200\n", 861 | "iter 99199, ep 495, score -351.277637, steps 200\n", 862 | "iter 99399, ep 496, score -118.680903, steps 200\n", 863 | "iter 99599, ep 497, score -118.259936, steps 200\n", 864 | "iter 99799, ep 498, score -351.153567, steps 200\n", 865 | "iter 99999, ep 499, score -244.595728, steps 200\n" 866 | ] 867 | }, 868 | { 869 | "name": "stderr", 870 | "output_type": "stream", 871 | "text": [ 872 | "[2017-08-26 21:34:01,853] Finished writing results. You can upload them to the scoreboard via gym.upload('/Users/winter/Google Drive/handson-ml/tmp')\n" 873 | ] 874 | } 875 | ], 876 | "source": [ 877 | "import gym\n", 878 | "from gym import wrappers\n", 879 | "max_episode = 500\n", 880 | "gamma = 0.99\n", 881 | "tau = 0.001\n", 882 | "memory_size = 10000\n", 883 | "batch_size = 256\n", 884 | "memory_warmup = batch_size*3\n", 885 | "max_explore_eps = 100\n", 886 | "save_path = 'DDPG_net_Class.ckpt'\n", 887 | "\n", 888 | "tf.reset_default_graph()\n", 889 | "actorAsync = AsyncNets('Actor')\n", 890 | "actor,actor_target = actorAsync.get_subnets()\n", 891 | "criticAsync = AsyncNets('Critic')\n", 892 | "critic,critic_target = criticAsync.get_subnets()\n", 893 | "\n", 894 | "init = tf.global_variables_initializer()\n", 895 | "saver = tf.train.Saver()\n", 896 | "with tf.Session() as sess:\n", 897 | " init.run()\n", 898 | " actorAsync.set_session(sess)\n", 899 | " criticAsync.set_session(sess)\n", 900 | " env = gym.make('Pendulum-v0')\n", 901 | " env = wrappers.Monitor(env,'./tmp/',force=True)\n", 902 | " obs = env.reset()\n", 903 | " iteration = 0\n", 904 | " episode = 0\n", 905 | " episode_score = 0\n", 906 | " episode_steps = 0\n", 907 | " noise = UONoise()\n", 908 | " memory = Memory(memory_size)\n", 909 | " while episode < max_episode:\n", 910 | " print('\\riter {}, ep {}'.format(iteration,episode),end='')\n", 911 | " action = actor.predict_action(np.reshape(obs,[1,-1]))[0]\n", 912 | " if episode= memory_warmup:\n", 919 | " memory_batch = memory.sample_batch(batch_size)\n", 920 | " extract_mem = lambda k : np.array([item[k] for item in memory_batch])\n", 921 | " obs_batch = extract_mem(0)\n", 922 | " action_batch = extract_mem(1)\n", 923 | " reward_batch = extract_mem(2)\n", 924 | " next_obs_batch = extract_mem(3)\n", 925 | " done_batch = extract_mem(4)\n", 926 | " action_next = actor_target.predict_action(next_obs_batch)\n", 927 | " Q_next = critic_target.predict_Q(next_obs_batch,action_next)[:,0]\n", 928 | " Qexpected_batch = reward_batch + gamma*(1-done_batch)*Q_next # target Q value\n", 929 | " Qexpected_batch = np.reshape(Qexpected_batch,[-1,1])\n", 930 | " # train critic\n", 931 | " critic.train(obs_batch,action_batch,Qexpected_batch)\n", 932 | " # train actor\n", 933 | " action_grads = critic.compute_action_grads(obs_batch,action_batch)\n", 934 | " actor.train(obs_batch,action_grads)\n", 935 | " # async update\n", 936 | " actorAsync.async_update(tau)\n", 937 | " criticAsync.async_update(tau)\n", 938 | " episode_score += reward\n", 939 | " episode_steps += 1\n", 940 | " iteration += 1\n", 941 | " if done:\n", 942 | " print(', score {:8f}, steps {}'.format(episode_score,episode_steps))\n", 943 | "# if episode%5 == 0:\n", 944 | " \n", 945 | "# Q_check = \n", 946 | " obs = env.reset()\n", 947 | " episode += 1\n", 948 | " episode_score = 0\n", 949 | " episode_steps = 0\n", 950 | " noise = UONoise()\n", 951 | " if episode%100==0:\n", 952 | " saver.save(sess,save_path)\n", 953 | " else:\n", 954 | " obs = next_obs\n", 955 | "env.close()" 956 | ] 957 | }, 958 | { 959 | "cell_type": "code", 960 | "execution_count": 9, 961 | "metadata": { 962 | "collapsed": false 963 | }, 964 | "outputs": [ 965 | { 966 | "name": "stderr", 967 | "output_type": "stream", 968 | "text": [ 969 | "[2017-08-26 21:34:01,863] [Pendulum-v0] Uploading 500 episodes of training data\n", 970 | "[2017-08-26 21:34:03,713] [Pendulum-v0] Uploading videos of 8 training episodes (628501 bytes)\n", 971 | "[2017-08-26 21:34:05,040] [Pendulum-v0] Creating evaluation object from ./tmp/ with learning curve and training video\n", 972 | "[2017-08-26 21:34:05,260] \n", 973 | "****************************************************\n", 974 | "You successfully uploaded your evaluation on Pendulum-v0 to\n", 975 | "OpenAI Gym! You can find it at:\n", 976 | "\n", 977 | " https://gym.openai.com/evaluations/eval_ZVyGQYhVTb67h0Vu6UtOYQ\n", 978 | "\n", 979 | "****************************************************\n" 980 | ] 981 | } 982 | ], 983 | "source": [ 984 | "gym.upload('./tmp/', api_key='sk_BlwjttPKR6ZsXVrObENYA')" 985 | ] 986 | } 987 | ], 988 | "metadata": { 989 | "anaconda-cloud": {}, 990 | "kernelspec": { 991 | "display_name": "Python [conda env:tensorflow]", 992 | "language": "python", 993 | "name": "conda-env-tensorflow-py" 994 | }, 995 | "language_info": { 996 | "codemirror_mode": { 997 | "name": "ipython", 998 | "version": 3 999 | }, 1000 | "file_extension": ".py", 1001 | "mimetype": "text/x-python", 1002 | "name": "python", 1003 | "nbconvert_exporter": "python", 1004 | "pygments_lexer": "ipython3", 1005 | "version": "3.6.2" 1006 | } 1007 | }, 1008 | "nbformat": 4, 1009 | "nbformat_minor": 2 1010 | } 1011 | --------------------------------------------------------------------------------