├── README.md ├── week4_practice_approx_qlearning.ipynb ├── week5_practice_reinforce.ipynb ├── week3_experience_replay.ipynb ├── week3_sarsa.ipynb ├── bandits.ipynb └── practice_mcts.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Practical-Reinforcement-Learning---Coursera-Advanced-Machine-Learning -------------------------------------------------------------------------------- /week4_practice_approx_qlearning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Approximate q-learning\n", 8 | "\n", 9 | "In this notebook you will teach a __tensorflow__ neural network to do Q-learning." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "__Frameworks__ - we'll accept this homework in any deep learning framework. This particular notebook was designed for tensorflow, but you will find it easy to adapt it to almost any python-based deep learning framework." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Starting virtual X frame buffer: Xvfb.\n", 29 | "env: DISPLAY=:1\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "#XVFB will be launched if you run on a server\n", 35 | "import os\n", 36 | "if os.environ.get(\"DISPLAY\") is not str or len(os.environ.get(\"DISPLAY\"))==0:\n", 37 | " !bash ../xvfb start\n", 38 | " %env DISPLAY=:1" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "import gym\n", 50 | "import numpy as np\n", 51 | "import pandas as pd\n", 52 | "import matplotlib.pyplot as plt\n", 53 | "%matplotlib inline" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": { 60 | "scrolled": false 61 | }, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "\u001b[33mWARN: gym.spaces.Box autodetected dtype as . Please provide explicit dtype.\u001b[0m\n", 68 | "\n" 69 | ] 70 | }, 71 | { 72 | "data": { 73 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD8CAYAAAB9y7/cAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAEp1JREFUeJzt3X+s3fV93/Hnq5hAlmQ1hAvybDOT\nxl1Dp8XQO+KIaaKQtsCqmkrNBK0aFCFdJhEpUaOt0ElrIg2pldawRetQ3ELjVFkII8mwEG1KHaIq\nfwRiJ45j41BuEie+tYfNAiRZNDaT9/44nxvOzPG9x/eHr+8nz4d0dL7fz/dzvuf9CSev+72f+/34\npKqQJPXnp1a6AEnS8jDgJalTBrwkdcqAl6ROGfCS1CkDXpI6tWwBn+T6JE8nmU5y53K9jyRptCzH\nffBJzgH+FvglYAb4EnBLVT215G8mSRppua7grwKmq+qbVfV/gAeAbcv0XpKkEdYs03nXA4eH9meA\nt52q80UXXVSbNm1aplIkafU5dOgQzz33XBZzjuUK+FFF/X9zQUmmgCmASy+9lN27dy9TKZK0+kxO\nTi76HMs1RTMDbBza3wAcGe5QVdurarKqJicmJpapDEn6ybVcAf8lYHOSy5K8BrgZ2LlM7yVJGmFZ\npmiq6kSS9wCfBc4B7q+qA8vxXpKk0ZZrDp6qehR4dLnOL0mamytZJalTBrwkdcqAl6ROGfCS1CkD\nXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAl\nqVMGvCR1alFf2ZfkEPB94GXgRFVNJrkQ+CSwCTgE/Muqen5xZUqSTtdSXMH/YlVtqarJtn8nsKuq\nNgO72r4k6QxbjimabcCOtr0DuGkZ3kOSNI/FBnwBf5VkT5Kp1nZJVR0FaM8XL/I9JEkLsKg5eODq\nqjqS5GLgsSRfH/eF7QfCFMCll166yDIkSSdb1BV8VR1pz8eAzwBXAc8mWQfQno+d4rXbq2qyqiYn\nJiYWU4YkaYQFB3yS1yV5w+w28MvAfmAncGvrdivw8GKLlCSdvsVM0VwCfCbJ7Hn+a1X9ZZIvAQ8m\nuQ34DvDOxZcpSTpdCw74qvom8NYR7f8TuG4xRUmSFs+VrJLUKQNekjplwEtSpwx4SeqUAS9JnTLg\nJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16S\nOmXAS1Kn5g34JPcnOZZk/1DbhUkeS/JMe76gtSfJh5NMJ9mX5MrlLF6SdGrjXMF/FLj+pLY7gV1V\ntRnY1fYBbgA2t8cUcO/SlClJOl3zBnxV/Q3w3ZOatwE72vYO4Kah9o/VwBeBtUnWLVWxkqTxLXQO\n/pKqOgrQni9u7euBw0P9ZlrbqySZSrI7ye7jx48vsAxJ0qks9R9ZM6KtRnWsqu1VNVlVkxMTE0tc\nhiRpoQH/7OzUS3s+1tpngI1D/TYARxZeniRpoRYa8DuBW9v2rcDDQ+3vanfTbAVenJ3KkSSdWWvm\n65DkE8A1wEVJZoDfB/4AeDDJbcB3gHe27o8CNwLTwA+Bdy9DzZKkMcwb8FV1yykOXTeibwF3LLYo\nSdLiuZJVkjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEv\nSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1Kn5g34JPcnOZZk/1DbB5L8XZK97XHj0LG7kkwn\neTrJryxX4ZKkuY1zBf9R4PoR7fdU1Zb2eBQgyeXAzcDPt9f8lyTnLFWxkqTxzRvwVfU3wHfHPN82\n4IGqeqmqvgVMA1ctoj5J0gItZg7+PUn2tSmcC1rbeuDwUJ+Z1vYqSaaS7E6y+/jx44soQ5I0ykID\n/l7gZ4AtwFHgj1p7RvStUSeoqu1VNVlVkxMTEwssQ5J0KgsK+Kp6tqperqofAX/CK9MwM8DGoa4b\ngCOLK1GStBALCvgk64Z2fx2YvcNmJ3BzkvOSXAZsBp5cXImSpIVYM1+HJJ8ArgEuSjID/D5wTZIt\nDKZfDgG3A1TVgSQPAk8BJ4A7qurl5SldkjSXeQO+qm4Z0XzfHP3vBu5eTFGSpMVzJaskdcqAl6RO\nGfCS1CkDXpI6ZcBLUqcMeEnq1Ly3SUo/ifZsv/1Vbb8w9ZEVqERaOK/gJalTBrwkdcqAl6ROGfCS\n1CkDXpI6ZcBLUqcMeEnqlAEvncR74NULA16SOmXAS1KnDHhJ6tS8AZ9kY5LHkxxMciDJe1v7hUke\nS/JMe76gtSfJh5NMJ9mX5MrlHoQk6dXGuYI/Aby/qt4CbAXuSHI5cCewq6o2A7vaPsANwOb2mALu\nXfKqJUnzmjfgq+poVX25bX8fOAisB7YBO1q3HcBNbXsb8LEa+CKwNsm6Ja9ckjSn05qDT7IJuAJ4\nArikqo7C4IcAcHHrth44PPSymdZ28rmmkuxOsvv48eOnX7kkaU5jB3yS1wOfAt5XVd+bq+uItnpV\nQ9X2qpqsqsmJiYlxy5AkjWmsgE9yLoNw/3hVfbo1Pzs79dKej7X2GWDj0Ms3AEeWplxJ0rjGuYsm\nwH3Awar60NChncCtbftW4OGh9ne1u2m2Ai/OTuVIks6ccb6y72rgt4GvJdnb2n4P+APgwSS3Ad8B\n3tmOPQrcCEwDPwTevaQVS5LGMm/AV9UXGD2vDnDdiP4F3LHIuiRJi+RKVknqlAEvDRn1L0lKq5UB\nL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfDS\nPH5h6iMrXYK0IAa8JHXKgJekThnwktSpcb50e2OSx5McTHIgyXtb+weS/F2Sve1x49Br7koyneTp\nJL+ynAOQJI02zpdunwDeX1VfTvIGYE+Sx9qxe6rqPwx3TnI5cDPw88A/AP46yc9W1ctLWbgkaW7z\nXsFX1dGq+nLb/j5wEFg/x0u2AQ9U1UtV9S1gGrhqKYqVJI3vtObgk2wCrgCeaE3vSbIvyf1JLmht\n64HDQy+bYe4fCJKkZTB2wCd5PfAp4H1V9T3gXuBngC3AUeCPZruOeHmNON9Ukt1Jdh8/fvy0C5ck\nzW2sgE9yLoNw/3hVfRqgqp6tqper6kfAn/DKNMwMsHHo5RuAIyefs6q2V9VkVU1OTEwsZgySpBHG\nuYsmwH3Awar60FD7uqFuvw7sb9s7gZuTnJfkMmAz8OTSlSxJGsc4d9FcDfw28LUke1vb7wG3JNnC\nYPrlEHA7QFUdSPIg8BSDO3Du8A4aSTrz5g34qvoCo+fVH53jNXcDdy+iLknSIrmSVWr2bL99pUuQ\nlpQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6RO\nGfDqWpKxH4s9h3S2MeClOUzevn2lS5AWbJwv/JB+YjxydOrH27+6znDX6uYVvNQMh/uofWm1MeAl\nqVPjfOn2+UmeTPLVJAeSfLC1X5bkiSTPJPlkkte09vPa/nQ7vml5hyBJGmWcK/iXgGur6q3AFuD6\nJFuBPwTuqarNwPPAba3/bcDzVfVm4J7WTzrrnTzn7hy8VrtxvnS7gB+03XPbo4Brgd9s7TuADwD3\nAtvaNsBDwH9OknYe6aw1uGPmlVD/wIpVIi2Nse6iSXIOsAd4M/DHwDeAF6rqROsyA6xv2+uBwwBV\ndSLJi8AbgedOdf49e/Z4H7FWPT/DOtuMFfBV9TKwJcla4DPAW0Z1a8+jPuWvunpPMgVMAVx66aV8\n+9vfHqtg6XScydD1l1QtpcnJyUWf47TuoqmqF4DPA1uBtUlmf0BsAI607RlgI0A7/tPAd0eca3tV\nTVbV5MTExMKqlySd0jh30Uy0K3eSvBZ4B3AQeBz4jdbtVuDhtr2z7dOOf875d0k688aZolkH7Gjz\n8D8FPFhVjyR5Cnggyb8HvgLc1/rfB/x5kmkGV+43L0PdkqR5jHMXzT7gihHt3wSuGtH+v4F3Lkl1\nkqQFcyWrJHXKgJekThnwktQp/7lgdc0buPSTzCt4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkD\nXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktSpcb50+/wkTyb5apIDST7Y2j+a5FtJ\n9rbHltaeJB9OMp1kX5Irl3sQkqRXG+ffg38JuLaqfpDkXOALSf6iHfvXVfXQSf1vADa3x9uAe9uz\nJOkMmvcKvgZ+0HbPbY+5vkVhG/Cx9rovAmuTrFt8qZKk0zHWHHySc5LsBY4Bj1XVE+3Q3W0a5p4k\n57W29cDhoZfPtDZJ0hk0VsBX1ctVtQXYAFyV5B8DdwE/B/xT4ELgd1v3jDrFyQ1JppLsTrL7+PHj\nCypeknRqp3UXTVW9AHweuL6qjrZpmJeAPwOuat1mgI1DL9sAHBlxru1VNVlVkxMTEwsqXpJ0auPc\nRTORZG3bfi3wDuDrs/PqSQLcBOxvL9kJvKvdTbMVeLGqji5L9ZKkUxrnLpp1wI4k5zD4gfBgVT2S\n5HNJJhhMyewF/lXr/yhwIzAN/BB499KXLUmaz7wBX1X7gCtGtF97iv4F3LH40iRJi+FKVknqlAEv\nSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLU\nKQNekjplwEtSpwx4SeqUAS9JnTLgJalTYwd8knOSfCXJI23/siRPJHkmySeTvKa1n9f2p9vxTctT\nuiRpLqdzBf9e4ODQ/h8C91TVZuB54LbWfhvwfFW9Gbin9ZMknWFjBXySDcC/AP607Qe4FnioddkB\n3NS2t7V92vHrWn9J0hm0Zsx+/xH4N8Ab2v4bgReq6kTbnwHWt+31wGGAqjqR5MXW/7nhEyaZAqba\n7ktJ9i9oBGe/izhp7J3odVzQ79gc1+ryD5NMVdX2hZ5g3oBP8qvAsarak+Sa2eYRXWuMY680DIre\n3t5jd1VNjlXxKtPr2HodF/Q7Nse1+iTZTcvJhRjnCv5q4NeS3AicD/x9Blf0a5OsaVfxG4Ajrf8M\nsBGYSbIG+GnguwstUJK0MPPOwVfVXVW1oao2ATcDn6uq3wIeB36jdbsVeLht72z7tOOfq6pXXcFL\nkpbXYu6D/13gd5JMM5hjv6+13we8sbX/DnDnGOda8K8gq0CvY+t1XNDv2BzX6rOoscWLa0nqkytZ\nJalTKx7wSa5P8nRb+TrOdM5ZJcn9SY4N3+aZ5MIkj7VVvo8luaC1J8mH21j3Jbly5SqfW5KNSR5P\ncjDJgSTvbe2remxJzk/yZJKvtnF9sLV3sTK71xXnSQ4l+VqSve3OklX/WQRIsjbJQ0m+3v6/9val\nHNeKBnySc4A/Bm4ALgduSXL5Sta0AB8Frj+p7U5gV1vlu4tX/g5xA7C5PaaAe89QjQtxAnh/Vb0F\n2Arc0f7brPaxvQRcW1VvBbYA1yfZSj8rs3tecf6LVbVl6JbI1f5ZBPhPwF9W1c8Bb2Xw327pxlVV\nK/YA3g58dmj/LuCulaxpgePYBOwf2n8aWNe21wFPt+2PALeM6ne2PxjcJfVLPY0N+HvAl4G3MVgo\ns6a1//hzCXwWeHvbXtP6ZaVrP8V4NrRAuBZ4hMGalFU/rlbjIeCik9pW9WeRwS3n3zr5f/elHNdK\nT9H8eNVrM7widjW7pKqOArTni1v7qhxv+/X9CuAJOhhbm8bYCxwDHgO+wZgrs4HZldlno9kV5z9q\n+2OvOOfsHhcMFkv+VZI9bRU8rP7P4puA48CftWm1P03yOpZwXCsd8GOteu3IqhtvktcDnwLeV1Xf\nm6vriLazcmxV9XJVbWFwxXsV8JZR3drzqhhXhlacDzeP6LqqxjXk6qq6ksE0xR1J/vkcfVfL2NYA\nVwL3VtUVwP9i7tvKT3tcKx3ws6teZw2viF3Nnk2yDqA9H2vtq2q8Sc5lEO4fr6pPt+YuxgZQVS8A\nn2fwN4a1beU1jF6ZzVm+Mnt2xfkh4AEG0zQ/XnHe+qzGcQFQVUfa8zHgMwx+MK/2z+IMMFNVT7T9\nhxgE/pKNa6UD/kvA5vaX/tcwWCm7c4VrWgrDq3lPXuX7rvbX8K3Ai7O/ip1tkoTBorWDVfWhoUOr\nemxJJpKsbduvBd7B4A9bq3pldnW84jzJ65K8YXYb+GVgP6v8s1hV/wM4nOQftabrgKdYynGdBX9o\nuBH4WwbzoP92petZQP2fAI4C/5fBT9jbGMxl7gKeac8Xtr5hcNfQN4CvAZMrXf8c4/pnDH792wfs\nbY8bV/vYgH8CfKWNaz/w71r7m4AngWngvwHntfbz2/50O/6mlR7DGGO8Bnikl3G1MXy1PQ7M5sRq\n/yy2WrcAu9vn8b8DFyzluFzJKkmdWukpGknSMjHgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcM\neEnq1P8DSYuHzA27mzcAAAAASUVORK5CYII=\n", 74 | "text/plain": [ 75 | "
" 76 | ] 77 | }, 78 | "metadata": {}, 79 | "output_type": "display_data" 80 | } 81 | ], 82 | "source": [ 83 | "env = gym.make(\"CartPole-v0\").env\n", 84 | "env.reset()\n", 85 | "n_actions = env.action_space.n\n", 86 | "state_dim = env.observation_space.shape\n", 87 | "\n", 88 | "plt.imshow(env.render(\"rgb_array\"))\n", 89 | "print(type(env.action_space))" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "# Approximate (deep) Q-learning: building the network\n", 97 | "\n", 98 | "To train a neural network policy one must have a neural network policy. Let's build it.\n", 99 | "\n", 100 | "\n", 101 | "Since we're working with a pre-extracted features (cart positions, angles and velocities), we don't need a complicated network yet. In fact, let's build something like this for starters:\n", 102 | "\n", 103 | "![img](https://s14.postimg.org/uzay2q5rl/qlearning_scheme.png)\n", 104 | "\n", 105 | "For your first run, please only use linear layers (L.Dense) and activations. Stuff like batch normalization or dropout may ruin everything if used haphazardly. \n", 106 | "\n", 107 | "Also please avoid using nonlinearities like sigmoid & tanh: agent's observations are not normalized so sigmoids may become saturated from init.\n", 108 | "\n", 109 | "Ideally you should start small with maybe 1-2 hidden layers with < 200 neurons and then increase network size if agent doesn't beat the target score." 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 4, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stderr", 119 | "output_type": "stream", 120 | "text": [ 121 | "Using TensorFlow backend.\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "import tensorflow as tf\n", 127 | "import keras\n", 128 | "import keras.layers as L\n", 129 | "tf.reset_default_graph()\n", 130 | "sess = tf.InteractiveSession()\n", 131 | "keras.backend.set_session(sess)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 5, 137 | "metadata": { 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "network = keras.models.Sequential()\n", 143 | "network.add(L.InputLayer(state_dim))\n", 144 | "network.add(L.Dense(150, activation='relu'))\n", 145 | "network.add(L.Dense(200, activation='relu'))\n", 146 | "network.add(L.Dense(n_actions, activation='linear'))\n", 147 | "# let's create a network for approximate q-learning following guidelines above" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 6, 153 | "metadata": { 154 | "collapsed": true 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "def get_action(state, epsilon=0):\n", 159 | " \"\"\"\n", 160 | " sample actions with epsilon-greedy policy\n", 161 | " recap: with p = epsilon pick random action, else pick action with highest Q(s,a)\n", 162 | " \"\"\"\n", 163 | " \n", 164 | " q_values = network.predict(state[None])[0]\n", 165 | " val = np.random.uniform(0,1)\n", 166 | " if val <= epsilon:\n", 167 | " action = np.random.choice(n_actions, 1)[0]\n", 168 | " else:\n", 169 | " action = np.argmax(q_values)\n", 170 | " ###YOUR CODE\n", 171 | "\n", 172 | " return action\n" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 7, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "name": "stdout", 182 | "output_type": "stream", 183 | "text": [ 184 | "e=0.0 tests passed\n", 185 | "e=0.1 tests passed\n", 186 | "e=0.5 tests passed\n", 187 | "e=1.0 tests passed\n" 188 | ] 189 | } 190 | ], 191 | "source": [ 192 | "assert network.output_shape == (None, n_actions), \"please make sure your model maps state s -> [Q(s,a0), ..., Q(s, a_last)]\"\n", 193 | "assert network.layers[-1].activation == keras.activations.linear, \"please make sure you predict q-values without nonlinearity\"\n", 194 | "\n", 195 | "# test epsilon-greedy exploration\n", 196 | "s = env.reset()\n", 197 | "assert np.shape(get_action(s)) == (), \"please return just one action (integer)\"\n", 198 | "for eps in [0., 0.1, 0.5, 1.0]:\n", 199 | " state_frequencies = np.bincount([get_action(s, epsilon=eps) for i in range(10000)], minlength=n_actions)\n", 200 | " best_action = state_frequencies.argmax()\n", 201 | " assert abs(state_frequencies[best_action] - 10000 * (1 - eps + eps / n_actions)) < 200\n", 202 | " for other_action in range(n_actions):\n", 203 | " if other_action != best_action:\n", 204 | " assert abs(state_frequencies[other_action] - 10000 * (eps / n_actions)) < 200\n", 205 | " print('e=%.1f tests passed'%eps)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "### Q-learning via gradient descent\n", 213 | "\n", 214 | "We shall now train our agent's Q-function by minimizing the TD loss:\n", 215 | "$$ L = { 1 \\over N} \\sum_i (Q_{\\theta}(s,a) - [r(s,a) + \\gamma \\cdot max_{a'} Q_{-}(s', a')]) ^2 $$\n", 216 | "\n", 217 | "\n", 218 | "Where\n", 219 | "* $s, a, r, s'$ are current state, action, reward and next state respectively\n", 220 | "* $\\gamma$ is a discount factor defined two cells above.\n", 221 | "\n", 222 | "The tricky part is with $Q_{-}(s',a')$. From an engineering standpoint, it's the same as $Q_{\\theta}$ - the output of your neural network policy. However, when doing gradient descent, __we won't propagate gradients through it__ to make training more stable (see lectures).\n", 223 | "\n", 224 | "To do so, we shall use `tf.stop_gradient` function which basically says \"consider this thing constant when doingbackprop\"." 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 8, 230 | "metadata": { 231 | "collapsed": true 232 | }, 233 | "outputs": [], 234 | "source": [ 235 | "# Create placeholders for the tuple and a special indicator for game end (is_done = True)\n", 236 | "states_ph = keras.backend.placeholder(dtype='float32', shape=(None,) + state_dim)\n", 237 | "actions_ph = keras.backend.placeholder(dtype='int32', shape=[None])\n", 238 | "rewards_ph = keras.backend.placeholder(dtype='float32', shape=[None])\n", 239 | "next_states_ph = keras.backend.placeholder(dtype='float32', shape=(None,) + state_dim)\n", 240 | "is_done_ph = keras.backend.placeholder(dtype='bool', shape=[None])" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 9, 246 | "metadata": { 247 | "collapsed": true 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "#get q-values for all actions in current states\n", 252 | "predicted_qvalues = network(states_ph)\n", 253 | "\n", 254 | "#select q-values for chosen actions\n", 255 | "predicted_qvalues_for_actions = tf.reduce_sum(predicted_qvalues * tf.one_hot(actions_ph, n_actions), axis=1)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 10, 261 | "metadata": { 262 | "collapsed": true 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "gamma = 0.99\n", 267 | "\n", 268 | "# compute q-values for all actions in next states\n", 269 | "predicted_next_qvalues = network(next_states_ph)\n", 270 | "\n", 271 | "# compute V*(next_states) using predicted next q-values\n", 272 | "next_state_values = tf.reduce_max(predicted_next_qvalues, axis=1)\n", 273 | "\n", 274 | "# compute \"target q-values\" for loss - it's what's inside square parentheses in the above formula.\n", 275 | "target_qvalues_for_actions = rewards_ph + gamma*next_state_values\n", 276 | "\n", 277 | "# at the last state we shall use simplified formula: Q(s,a) = r(s,a) since s' doesn't exist\n", 278 | "target_qvalues_for_actions = tf.where(is_done_ph, rewards_ph, target_qvalues_for_actions)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 11, 284 | "metadata": { 285 | "collapsed": true 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "#mean squared error loss to minimize\n", 290 | "loss = (predicted_qvalues_for_actions - tf.stop_gradient(target_qvalues_for_actions)) ** 2\n", 291 | "loss = tf.reduce_mean(loss)\n", 292 | "\n", 293 | "# training function that resembles agent.update(state, action, reward, next_state) from tabular agent\n", 294 | "train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 12, 300 | "metadata": { 301 | "collapsed": true 302 | }, 303 | "outputs": [], 304 | "source": [ 305 | "assert tf.gradients(loss, [predicted_qvalues_for_actions])[0] is not None, \"make sure you update q-values for chosen actions and not just all actions\"\n", 306 | "assert tf.gradients(loss, [predicted_next_qvalues])[0] is None, \"make sure you don't propagate gradient w.r.t. Q_(s',a')\"\n", 307 | "assert predicted_next_qvalues.shape.ndims == 2, \"make sure you predicted q-values for all actions in next state\"\n", 308 | "assert next_state_values.shape.ndims == 1, \"make sure you computed V(s') as maximum over just the actions axis and not all axes\"\n", 309 | "assert target_qvalues_for_actions.shape.ndims == 1, \"there's something wrong with target q-values, they must be a vector\"" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "### Playing the game" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 13, 322 | "metadata": { 323 | "collapsed": true 324 | }, 325 | "outputs": [], 326 | "source": [ 327 | "def generate_session(t_max=1000, epsilon=0, train=False):\n", 328 | " \"\"\"play env with approximate q-learning agent and train it at the same time\"\"\"\n", 329 | " total_reward = 0\n", 330 | " s = env.reset()\n", 331 | " \n", 332 | " for t in range(t_max):\n", 333 | " a = get_action(s, epsilon=epsilon) \n", 334 | " next_s, r, done, _ = env.step(a)\n", 335 | " \n", 336 | " if train:\n", 337 | " sess.run(train_step,{\n", 338 | " states_ph: [s], actions_ph: [a], rewards_ph: [r], \n", 339 | " next_states_ph: [next_s], is_done_ph: [done]\n", 340 | " })\n", 341 | "\n", 342 | " total_reward += r\n", 343 | " s = next_s\n", 344 | " if done: break\n", 345 | " \n", 346 | " return total_reward" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": { 353 | "collapsed": true 354 | }, 355 | "outputs": [], 356 | "source": [ 357 | "epsilon = 0.5" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "scrolled": true 365 | }, 366 | "outputs": [ 367 | { 368 | "name": "stdout", 369 | "output_type": "stream", 370 | "text": [ 371 | "epoch #0\tmean reward = 14.050\t validation reward = 9.480\t epsilon = 0.500\n", 372 | "epoch #1\tmean reward = 13.750\t validation reward = 13.320\t epsilon = 0.495\n", 373 | "epoch #2\tmean reward = 13.050\t validation reward = 9.510\t epsilon = 0.490\n", 374 | "epoch #3\tmean reward = 13.580\t validation reward = 9.430\t epsilon = 0.485\n", 375 | "epoch #4\tmean reward = 15.880\t validation reward = 9.340\t epsilon = 0.480\n", 376 | "epoch #5\tmean reward = 14.690\t validation reward = 10.800\t epsilon = 0.475\n", 377 | "epoch #6\tmean reward = 16.330\t validation reward = 12.580\t epsilon = 0.471\n", 378 | "epoch #7\tmean reward = 26.420\t validation reward = 22.160\t epsilon = 0.466\n", 379 | "epoch #8\tmean reward = 23.110\t validation reward = 11.800\t epsilon = 0.461\n", 380 | "epoch #9\tmean reward = 29.330\t validation reward = 37.650\t epsilon = 0.457\n", 381 | "epoch #10\tmean reward = 51.820\t validation reward = 22.300\t epsilon = 0.452\n", 382 | "epoch #11\tmean reward = 42.890\t validation reward = 45.000\t epsilon = 0.448\n" 383 | ] 384 | } 385 | ], 386 | "source": [ 387 | "for i in range(1000):\n", 388 | " session_rewards = [generate_session(epsilon=epsilon, train=True) for _ in range(100)]\n", 389 | " validation_rewards = [generate_session() for _ in range(100)]\n", 390 | " print(\"epoch #{}\\tmean reward = {:.3f}\\t validation reward = {:.3f}\\t epsilon = {:.3f}\".format(i, np.mean(session_rewards), np.mean(validation_rewards), epsilon))\n", 391 | " \n", 392 | " epsilon *= 0.99\n", 393 | " assert epsilon >= 1e-4, \"Make sure epsilon is always nonzero during training\"\n", 394 | " \n", 395 | " if np.mean(validation_rewards) > 300:\n", 396 | " print (\"You Win!\")\n", 397 | " break\n" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "### How to interpret results\n", 405 | "\n", 406 | "\n", 407 | "Welcome to the f.. world of deep f...n reinforcement learning. Don't expect agent's reward to smoothly go up. Hope for it to go increase eventually. If it deems you worthy.\n", 408 | "\n", 409 | "Seriously though,\n", 410 | "* __ mean reward__ is the average reward per game. For a correct implementation it may stay low for some 10 epochs, then start growing while oscilating insanely and converges by ~50-100 steps depending on the network architecture. \n", 411 | "* If it never reaches target score by the end of for loop, try increasing the number of hidden neurons or look at the epsilon.\n", 412 | "* __ epsilon__ - agent's willingness to explore. If you see that agent's already at < 0.01 epsilon before it's is at least 200, just reset it back to 0.1 - 0.5." 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "### Record videos\n", 420 | "\n", 421 | "As usual, we now use `gym.wrappers.Monitor` to record a video of our agent playing the game. Unlike our previous attempts with state binarization, this time we expect our agent to act ~~(or fail)~~ more smoothly since there's no more binarization error at play.\n", 422 | "\n", 423 | "As you already did with tabular q-learning, we set epsilon=0 for final evaluation to prevent agent from exploring himself to death." 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "collapsed": true, 431 | "scrolled": true 432 | }, 433 | "outputs": [], 434 | "source": [ 435 | "#record sessions\n", 436 | "import gym.wrappers\n", 437 | "env = gym.wrappers.Monitor(gym.make(\"CartPole-v0\"),directory=\"videos\",force=True)\n", 438 | "sessions = [generate_session(epsilon=0, train=False) for _ in range(100)]\n", 439 | "env.close()\n" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": null, 445 | "metadata": { 446 | "collapsed": true 447 | }, 448 | "outputs": [], 449 | "source": [ 450 | "#show video\n", 451 | "from IPython.display import HTML\n", 452 | "import os\n", 453 | "\n", 454 | "video_names = list(filter(lambda s:s.endswith(\".mp4\"),os.listdir(\"./videos/\")))\n", 455 | "\n", 456 | "HTML(\"\"\"\n", 457 | "\n", 460 | "\"\"\".format(\"./videos/\"+video_names[-1])) #this may or may not be _last_ video. Try other indices" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": { 467 | "collapsed": true 468 | }, 469 | "outputs": [], 470 | "source": [] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "metadata": {}, 475 | "source": [ 476 | "---" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": { 482 | "collapsed": true 483 | }, 484 | "source": [ 485 | "### Submit to coursera" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "metadata": { 492 | "collapsed": true 493 | }, 494 | "outputs": [], 495 | "source": [ 496 | "from submit import submit_cartpole\n", 497 | "submit_cartpole(generate_session, \"matcha.11@samsung.com\", \"eIiIF0CAbVmILTuP\")" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "metadata": { 504 | "collapsed": true 505 | }, 506 | "outputs": [], 507 | "source": [] 508 | } 509 | ], 510 | "metadata": { 511 | "anaconda-cloud": {}, 512 | "kernelspec": { 513 | "display_name": "Python 3", 514 | "language": "python", 515 | "name": "python3" 516 | }, 517 | "language_info": { 518 | "codemirror_mode": { 519 | "name": "ipython", 520 | "version": 3 521 | }, 522 | "file_extension": ".py", 523 | "mimetype": "text/x-python", 524 | "name": "python", 525 | "nbconvert_exporter": "python", 526 | "pygments_lexer": "ipython3", 527 | "version": "3.6.2" 528 | } 529 | }, 530 | "nbformat": 4, 531 | "nbformat_minor": 1 532 | } 533 | -------------------------------------------------------------------------------- /week5_practice_reinforce.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# REINFORCE in TensorFlow\n", 8 | "\n", 9 | "This notebook implements a basic reinforce algorithm a.k.a. policy gradient for CartPole env.\n", 10 | "\n", 11 | "It has been deliberately written to be as simple and human-readable.\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "The notebook assumes that you have [openai gym](https://github.com/openai/gym) installed.\n", 19 | "\n", 20 | "In case you're running on a server, [use xvfb](https://github.com/openai/gym#rendering-on-a-server)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 54, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "" 32 | ] 33 | }, 34 | "execution_count": 54, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | }, 38 | { 39 | "data": { 40 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD8CAYAAAB9y7/cAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEoxJREFUeJzt3XGs3eV93/H3ZzaBLMlqCBfk2WYmrbeGTouhd4SIaaKQtsCqmkrNBJsaFCFdJhEpUaOt0EkrkYbUSmvYom0obqFxpiyEkaRYiDVlhKjKH4FcEscxOBQnscKtPXyzAEkWjQ3y3R/nueHUHN97fO89vr4P75d0dH6/5/ec3/k++PC5Pz/39/ikqpAk9edvrHUBkqTJMOAlqVMGvCR1yoCXpE4Z8JLUKQNekjo1sYBPcnWSp5McSnLrpN5HkjRaJnEffJINwF8CvwzMAV8Bbqiqp1b9zSRJI03qCv5S4FBVfbuq/i9wL7BrQu8lSRph44TOuwV4dmh/DnjniTqfe+65tX379gmVIknrz+HDh/ne976XlZxjUgE/qqi/NheUZAaYAbjggguYnZ2dUCmStP5MT0+v+ByTmqKZA7YN7W8Fjgx3qKrdVTVdVdNTU1MTKkOSXr8mFfBfAXYkuTDJG4Drgb0Tei9J0ggTmaKpqpeTvB/4PLABuKeqnpzEe0mSRpvUHDxV9RDw0KTOL0lanCtZJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1akVf2ZfkMPBD4BXg5aqaTnIO8GlgO3AY+KdV9fzKypQknazVuIL/paraWVXTbf9W4JGq2gE80vYlSafYJKZodgF72vYe4LoJvIckaQkrDfgC/jzJE0lmWtv5VXUUoD2ft8L3kCQtw4rm4IHLq+pIkvOAh5N8c9wXth8IMwAXXHDBCsuQJB1vRVfwVXWkPR8DPgdcCjyXZDNAez52gtfurqrpqpqemppaSRmSpBGWHfBJ3pTkLQvbwK8AB4C9wI2t243AAystUpJ08lYyRXM+8LkkC+f5r1X1Z0m+AtyX5Cbgu8B7Vl6mJOlkLTvgq+rbwDtGtP8v4KqVFCVJWjlXskpSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdWjLgk9yT5FiSA0Nt5yR5OMkz7fns1p4kH01yKMn+JJdMsnhJ0omNcwX/ceDq49puBR6pqh3AI20f4BpgR3vMAHetTpmSpJO1ZMBX1V8A3z+ueRewp23vAa4bav9EDXwZ2JRk82oVK0ka33Ln4M+vqqMA7fm81r4FeHao31xre40kM0lmk8zOz88vswxJ0oms9i9ZM6KtRnWsqt1VNV1V01NTU6tchiRpuQH/3MLUS3s+1trngG1D/bYCR5ZfniRpuZYb8HuBG9v2jcADQ+3vbXfTXAa8uDCVI0k6tTYu1SHJp4ArgHOTzAG/B/w+cF+Sm4DvAu9p3R8CrgUOAT8G3jeBmiVJY1gy4KvqhhMcumpE3wJuWWlRkqSVcyWrJHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROLRnwSe5JcizJgaG225P8VZJ97XHt0LHbkhxK8nSSX51U4ZKkxY1zBf9x4OoR7XdW1c72eAggyUXA9cAvtNf85yQbVqtYSdL4lgz4qvoL4Ptjnm8XcG9VvVRV3wEOAZeuoD5J0jKtZA7+/Un2tymcs1vbFuDZoT5zre01kswkmU0yOz8/v4IyJEmjLDfg7wJ+FtgJHAX+sLVnRN8adYKq2l1V01U1PTU1tcwyJEknsqyAr6rnquqVqvoJ8Ee8Og0zB2wb6roVOLKyEiVJy7GsgE+yeWj3N4CFO2z2AtcnOTPJhcAO4PGVlShJWo6NS3VI8ingCuDcJHPA7wFXJNnJYPrlMHAzQFU9meQ+4CngZeCWqnplMqVLkhazZMBX1Q0jmu9epP8dwB0rKUqStHKuZJWkThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdWvI2SaknT+y+eWT7L8587BRXIk2eV/CS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4PW64t0yej0x4CWpUwa8JHXKgJekThnwktQpA16SOrVkwCfZluTRJAeTPJnkA639nCQPJ3mmPZ/d2pPko0kOJdmf5JJJD0KS9FrjXMG/DHyoqt4OXAbckuQi4FbgkaraATzS9gGuAXa0xwxw16pXLUla0pIBX1VHq+qrbfuHwEFgC7AL2NO67QGua9u7gE/UwJeBTUk2r3rlkqRFndQcfJLtwMXAY8D5VXUUBj8EgPNaty3As0Mvm2ttx59rJslsktn5+fmTr1yStKixAz7Jm4HPAB+sqh8s1nVEW72moWp3VU1X1fTU1NS4ZUiSxjRWwCc5g0G4f7KqPtuan1uYemnPx1r7HLBt6OVbgSOrU64kaVzj3EUT4G7gYFV9ZOjQXuDGtn0j8MBQ+3vb3TSXAS8uTOVIp6sTfdOTtJ6N85V9lwO/BXwjyb7W9rvA7wP3JbkJ+C7wnnbsIeBa4BDwY+B9q1qxJGksSwZ8VX2J0fPqAFeN6F/ALSusS5K0Qq5klaROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgNfrzi/OfGytS5BOCQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6Nc6Xbm9L8miSg0meTPKB1n57kr9Ksq89rh16zW1JDiV5OsmvTnIAkqTRxvnS7ZeBD1XVV5O8BXgiycPt2J1V9e+GOye5CLge+AXgbwP/I8nfrapXVrNwSdLilryCr6qjVfXVtv1D4CCwZZGX7ALuraqXquo7wCHg0tUoVloto/49mid237wGlUiTc1Jz8Em2AxcDj7Wm9yfZn+SeJGe3ti3As0Mvm2PxHwiSpAkYO+CTvBn4DPDBqvoBcBfws8BO4CjwhwtdR7y8RpxvJslsktn5+fmTLlyStLixAj7JGQzC/ZNV9VmAqnquql6pqp8Af8Sr0zBzwLahl28Fjhx/zqraXVXTVTU9NTW1kjFIkkYY5y6aAHcDB6vqI0Ptm4e6/QZwoG3vBa5PcmaSC4EdwOOrV7IkaRzj3EVzOfBbwDeS7GttvwvckGQng+mXw8DNAFX1ZJL7gKcY3IFzi3fQSNKpt2TAV9WXGD2v/tAir7kDuGMFdUmSVsiVrJLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4NWdJGM9VvLaxc4hnS4MeEnq1Dhf+CF17cGjM0N7u9esDmm1eQWv17W/Hu5SXwx4vW5N3/zaq/Xbb59dg0qkyRjnS7fPSvJ4kq8neTLJh1v7hUkeS/JMkk8neUNrP7PtH2rHt092CNLq+bXNTtGoH+Ncwb8EXFlV7wB2AlcnuQz4A+DOqtoBPA/c1PrfBDxfVT8H3Nn6SaclA109G+dLtwv4Uds9oz0KuBL4Z619D3A7cBewq20D3A/8xyRp55FOK4NpmldD/vY1q0RafWPNwSfZkGQfcAx4GPgW8EJVvdy6zAFb2vYW4FmAdvxF4K2rWbQkaWljBXxVvVJVO4GtwKXA20d1a8+jVn+85uo9yUyS2SSz8/Pz49YrSRrTSd1FU1UvAF8ELgM2JVmY4tkKHGnbc8A2gHb8Z4DvjzjX7qqarqrpqamp5VUvSTqhce6imUqyqW2/EXg3cBB4FPjN1u1G4IG2vbft045/wfl3STr1xlnJuhnYk2QDgx8I91XVg0meAu5N8m+BrwF3t/53A/8lySEGV+7XT6BuSdISxrmLZj9w8Yj2bzOYjz++/f8A71mV6iRJy+ZKVknqlAEvSZ0y4CWpU/5zweqON21JA17BS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROjfOl22cleTzJ15M8meTDrf3jSb6TZF977GztSfLRJIeS7E9yyaQHIUl6rXH+PfiXgCur6kdJzgC+lOS/t2P/sqruP67/NcCO9ngncFd7liSdQktewdfAj9ruGe2x2Dcq7AI+0V73ZWBTks0rL1WSdDLGmoNPsiHJPuAY8HBVPdYO3dGmYe5McmZr2wI8O/TyudYmSTqFxgr4qnqlqnYCW4FLk/x94Dbg54F/CJwD/E7rnlGnOL4hyUyS2SSz8/PzyypeknRiJ3UXTVW9AHwRuLqqjrZpmJeAPwEubd3mgG1DL9sKHBlxrt1VNV1V01NTU8sqXpJ0YuPcRTOVZFPbfiPwbuCbC/PqSQJcBxxoL9kLvLfdTXMZ8GJVHZ1I9ZKkExrnLprNwJ4kGxj8QLivqh5M8oUkUwymZPYB/6L1fwi4FjgE/Bh43+qXLUlaypIBX1X7gYtHtF95gv4F3LLy0iRJK+FKVknqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTYwd8kg1JvpbkwbZ/YZLHkjyT5NNJ3tDaz2z7h9rx7ZMpXZK0mJO5gv8AcHBo/w+AO6tqB/A8cFNrvwl4vqp+Driz9ZMknWJjBXySrcA/Af647Qe4Eri/ddkDXNe2d7V92vGrWn9J0im0ccx+/x74V8Bb2v5bgReq6uW2PwdsadtbgGcBqurlJC+2/t8bPmGSGWCm7b6U5MCyRnD6O5fjxt6JXscF/Y7Nca0vfyfJTFXtXu4Jlgz4JL8GHKuqJ5JcsdA8omuNcezVhkHRu9t7zFbV9FgVrzO9jq3XcUG/Y3Nc60+SWVpOLsc4V/CXA7+e5FrgLOBvMbii35RkY7uK3wocaf3ngG3AXJKNwM8A319ugZKk5VlyDr6qbquqrVW1Hbge+EJV/XPgUeA3W7cbgQfa9t62Tzv+hap6zRW8JGmyVnIf/O8Av53kEIM59rtb+93AW1v7bwO3jnGuZf8VZB3odWy9jgv6HZvjWn9WNLZ4cS1JfXIlqyR1as0DPsnVSZ5uK1/Hmc45rSS5J8mx4ds8k5yT5OG2yvfhJGe39iT5aBvr/iSXrF3li0uyLcmjSQ4meTLJB1r7uh5bkrOSPJ7k621cH27tXazM7nXFeZLDSb6RZF+7s2TdfxYBkmxKcn+Sb7b/1961muNa04BPsgH4T8A1wEXADUkuWsualuHjwNXHtd0KPNJW+T7Cq7+HuAbY0R4zwF2nqMbleBn4UFW9HbgMuKX92az3sb0EXFlV7wB2AlcnuYx+Vmb3vOL8l6pq59Atkev9swjwH4A/q6qfB97B4M9u9cZVVWv2AN4FfH5o/zbgtrWsaZnj2A4cGNp/GtjctjcDT7ftjwE3jOp3uj8Y3CX1yz2NDfibwFeBdzJYKLOxtf/0cwl8HnhX297Y+mWtaz/BeLa2QLgSeJDBmpR1P65W42Hg3OPa1vVnkcEt5985/r/7ao5rradofrrqtRleEbuenV9VRwHa83mtfV2Ot/31/WLgMToYW5vG2AccAx4GvsWYK7OBhZXZp6OFFec/aftjrzjn9B4XDBZL/nmSJ9oqeFj/n8W3AfPAn7RptT9O8iZWcVxrHfBjrXrtyLobb5I3A58BPlhVP1is64i203JsVfVKVe1kcMV7KfD2Ud3a87oYV4ZWnA83j+i6rsY15PKquoTBNMUtSf7xIn3Xy9g2ApcAd1XVxcD/ZvHbyk96XGsd8AurXhcMr4hdz55LshmgPR9r7etqvEnOYBDun6yqz7bmLsYGUFUvAF9k8DuGTW3lNYxemc1pvjJ7YcX5YeBeBtM0P11x3vqsx3EBUFVH2vMx4HMMfjCv98/iHDBXVY+1/fsZBP6qjWutA/4rwI72m/43MFgpu3eNa1oNw6t5j1/l+9722/DLgBcX/ip2ukkSBovWDlbVR4YOreuxJZlKsqltvxF4N4NfbK3rldnV8YrzJG9K8paFbeBXgAOs889iVf1P4Nkkf681XQU8xWqO6zT4RcO1wF8ymAf912tdzzLq/xRwFPh/DH7C3sRgLvMR4Jn2fE7rGwZ3DX0L+AYwvdb1LzKuf8Tgr3/7gX3tce16HxvwD4CvtXEdAP5Na38b8DhwCPhvwJmt/ay2f6gdf9taj2GMMV4BPNjLuNoYvt4eTy7kxHr/LLZadwKz7fP4p8DZqzkuV7JKUqfWeopGkjQhBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ36/yZChBiAahacAAAAAElFTkSuQmCC\n", 41 | "text/plain": [ 42 | "
" 43 | ] 44 | }, 45 | "metadata": { 46 | "needs_background": "light" 47 | }, 48 | "output_type": "display_data" 49 | } 50 | ], 51 | "source": [ 52 | "import gym\n", 53 | "import numpy as np, pandas as pd\n", 54 | "import matplotlib.pyplot as plt\n", 55 | "%matplotlib inline\n", 56 | "\n", 57 | "env = gym.make(\"CartPole-v0\")\n", 58 | "\n", 59 | "#gym compatibility: unwrap TimeLimit\n", 60 | "if hasattr(env,'env'):\n", 61 | " env=env.env\n", 62 | "\n", 63 | "env.reset()\n", 64 | "n_actions = env.action_space.n\n", 65 | "state_dim = env.observation_space.shape\n", 66 | "\n", 67 | "plt.imshow(env.render(\"rgb_array\"))" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# Building the policy network" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "For REINFORCE algorithm, we'll need a model that predicts action probabilities given states.\n", 82 | "\n", 83 | "For numerical stability, please __do not include the softmax layer into your network architecture__. \n", 84 | "\n", 85 | "We'll use softmax or log-softmax where appropriate." 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 55, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "import tensorflow as tf\n", 95 | "\n", 96 | "#create input variables. We only need for REINFORCE\n", 97 | "states = tf.placeholder('float32',(None,)+state_dim,name=\"states\")\n", 98 | "actions = tf.placeholder('int32',name=\"action_ids\")\n", 99 | "cumulative_rewards = tf.placeholder('float32', name=\"cumulative_returns\")" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 56, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "import keras\n", 109 | "from keras.layers import Dense\n", 110 | "from keras.models import Sequential\n", 111 | "model = Sequential()\n", 112 | "model.add(Dense(50, activation=\"relu\", input_shape=state_dim))\n", 113 | "model.add(Dense(100, activation=\"relu\"))\n", 114 | "model.add(Dense(n_actions, activation=\"linear\"))\n", 115 | "logits = model(states)\n", 116 | "\n", 117 | "policy = tf.nn.softmax(logits)\n", 118 | "log_policy = tf.nn.log_softmax(logits)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 57, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "#utility function to pick action in one given state\n", 128 | "get_action_proba = lambda s: policy.eval({states:[s]})[0] " 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "#### Loss function and updates\n", 136 | "\n", 137 | "We now need to define objective and update over policy gradient.\n", 138 | "\n", 139 | "Our objective function is\n", 140 | "\n", 141 | "$$ J \\approx { 1 \\over N } \\sum _{s_i,a_i} \\pi_\\theta (a_i | s_i) \\cdot G(s_i,a_i) $$\n", 142 | "\n", 143 | "\n", 144 | "Following the REINFORCE algorithm, we can define our objective as follows: \n", 145 | "\n", 146 | "$$ \\hat J \\approx { 1 \\over N } \\sum _{s_i,a_i} log \\pi_\\theta (a_i | s_i) \\cdot G(s_i,a_i) $$\n", 147 | "\n", 148 | "When you compute gradient of that function over network weights $ \\theta $, it will become exactly the policy gradient.\n" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 58, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "#get probabilities for parti\n", 158 | "indices = tf.stack([tf.range(tf.shape(log_policy)[0]),actions],axis=-1)\n", 159 | "log_policy_for_actions = tf.gather_nd(log_policy,indices)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 59, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# policy objective as in the last formula. please use mean, not sum.\n", 169 | "# note: you need to use log_policy_for_actions to get log probabilities for actions taken\n", 170 | "\n", 171 | "J = tf.reduce_mean(log_policy_for_actions*cumulative_rewards)\n" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 60, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "#regularize with entropy\n", 181 | "entropy = -tf.reduce_sum(policy * log_policy, 1)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 61, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "#all network weights\n", 191 | "all_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)\n", 192 | "\n", 193 | "#weight updates. maximizing J is same as minimizing -J. Adding negative entropy.\n", 194 | "loss = -J -0.1 * entropy\n", 195 | "\n", 196 | "update = tf.train.AdamOptimizer().minimize(loss,var_list=all_weights)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "### Computing cumulative rewards" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 62, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "def get_cumulative_rewards(rewards, #rewards at each step\n", 213 | " gamma = 0.99 #discount for reward\n", 214 | " ):\n", 215 | " \"\"\"\n", 216 | " take a list of immediate rewards r(s,a) for the whole session \n", 217 | " compute cumulative rewards R(s,a) (a.k.a. G(s,a) in Sutton '16)\n", 218 | " R_t = r_t + gamma*r_{t+1} + gamma^2*r_{t+2} + ...\n", 219 | " \n", 220 | " The simple way to compute cumulative rewards is to iterate from last to first time tick\n", 221 | " and compute R_t = r_t + gamma*R_{t+1} recurrently\n", 222 | " \n", 223 | " You must return an array/list of cumulative rewards with as many elements as in the initial rewards.\n", 224 | " \"\"\"\n", 225 | " cumulative_rewards = [rewards[-1]]\n", 226 | " for i in range(len(rewards)-2, -1, -1):\n", 227 | " cumulative_rewards = [rewards[i]+(gamma*cumulative_rewards[0])]+cumulative_rewards\n", 228 | " return cumulative_rewards\n", 229 | " \n", 230 | " " 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 63, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | "looks good!\n" 243 | ] 244 | } 245 | ], 246 | "source": [ 247 | "#assert len(get_cumulative_rewards(range(100))) == 100\n", 248 | "assert np.allclose(get_cumulative_rewards([0,0,1,0,0,1,0],gamma=0.9),[1.40049, 1.5561, 1.729, 0.81, 0.9, 1.0, 0.0])\n", 249 | "assert np.allclose(get_cumulative_rewards([0,0,1,-2,3,-4,0],gamma=0.5), [0.0625, 0.125, 0.25, -1.5, 1.0, -4.0, 0.0])\n", 250 | "assert np.allclose(get_cumulative_rewards([0,0,1,2,3,4,0],gamma=0), [0, 0, 1, 2, 3, 4, 0])\n", 251 | "print(\"looks good!\")" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 64, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "def train_step(_states,_actions,_rewards):\n", 261 | " \"\"\"given full session, trains agent with policy gradient\"\"\"\n", 262 | " _cumulative_rewards = get_cumulative_rewards(_rewards)\n", 263 | " update.run({states:_states,actions:_actions,cumulative_rewards:_cumulative_rewards})" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "### Playing the game" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 65, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "def generate_session(t_max=1000):\n", 280 | " \"\"\"play env with REINFORCE agent and train at the session end\"\"\"\n", 281 | " \n", 282 | " #arrays to record session\n", 283 | " states,actions,rewards = [],[],[]\n", 284 | " \n", 285 | " s = env.reset()\n", 286 | " \n", 287 | " for t in range(t_max):\n", 288 | " \n", 289 | " #action probabilities array aka pi(a|s)\n", 290 | " action_probas = get_action_proba(s)\n", 291 | " a = np.random.choice(n_actions, 1, p=action_probas)[0]\n", 292 | " \n", 293 | " new_s,r,done,info = env.step(a)\n", 294 | " \n", 295 | " #record session history to train later\n", 296 | " states.append(s)\n", 297 | " actions.append(a)\n", 298 | " rewards.append(r)\n", 299 | " \n", 300 | " s = new_s\n", 301 | " if done: break\n", 302 | " \n", 303 | " train_step(states,actions,rewards)\n", 304 | " \n", 305 | " return sum(rewards)\n", 306 | " " 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 66, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "name": "stdout", 316 | "output_type": "stream", 317 | "text": [ 318 | "mean reward:46.320\n", 319 | "mean reward:184.720\n", 320 | "mean reward:377.940\n", 321 | "You Win!\n" 322 | ] 323 | } 324 | ], 325 | "source": [ 326 | "s = tf.InteractiveSession()\n", 327 | "s.run(tf.global_variables_initializer())\n", 328 | "\n", 329 | "for i in range(100):\n", 330 | " \n", 331 | " rewards = [generate_session() for _ in range(100)] #generate new sessions\n", 332 | " \n", 333 | " print (\"mean reward:%.3f\"%(np.mean(rewards)))\n", 334 | "\n", 335 | " if np.mean(rewards) > 300:\n", 336 | " print (\"You Win!\")\n", 337 | " break\n", 338 | " \n" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "### Results & video" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 13, 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "name": "stderr", 355 | "output_type": "stream", 356 | "text": [ 357 | "[2017-04-08 03:29:10,315] Making new env: CartPole-v0\n", 358 | "[2017-04-08 03:29:10,324] DEPRECATION WARNING: env.spec.timestep_limit has been deprecated. Replace your call to `env.spec.timestep_limit` with `env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')`. This change was made 12/28/2016 and is included in version 0.7.0\n", 359 | "[2017-04-08 03:29:10,329] Clearing 6 monitor files from previous run (because force=True was provided)\n", 360 | "[2017-04-08 03:29:10,336] Starting new video recorder writing to /home/jheuristic/Downloads/sonnet/sonnet/examples/videos/openaigym.video.0.14221.video000000.mp4\n", 361 | "[2017-04-08 03:29:16,834] Starting new video recorder writing to /home/jheuristic/Downloads/sonnet/sonnet/examples/videos/openaigym.video.0.14221.video000001.mp4\n", 362 | "[2017-04-08 03:29:23,689] Starting new video recorder writing to /home/jheuristic/Downloads/sonnet/sonnet/examples/videos/openaigym.video.0.14221.video000008.mp4\n", 363 | "[2017-04-08 03:29:33,407] Starting new video recorder writing to /home/jheuristic/Downloads/sonnet/sonnet/examples/videos/openaigym.video.0.14221.video000027.mp4\n", 364 | "[2017-04-08 03:29:45,840] Starting new video recorder writing to /home/jheuristic/Downloads/sonnet/sonnet/examples/videos/openaigym.video.0.14221.video000064.mp4\n", 365 | "[2017-04-08 03:29:56,812] Finished writing results. You can upload them to the scoreboard via gym.upload('/home/jheuristic/Downloads/sonnet/sonnet/examples/videos')\n" 366 | ] 367 | } 368 | ], 369 | "source": [ 370 | "#record sessions\n", 371 | "import gym.wrappers\n", 372 | "env = gym.wrappers.Monitor(gym.make(\"CartPole-v0\"),directory=\"videos\",force=True)\n", 373 | "sessions = [generate_session() for _ in range(100)]\n", 374 | "env.close()\n" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 14, 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "data": { 384 | "text/html": [ 385 | "\n", 386 | "\n" 389 | ], 390 | "text/plain": [ 391 | "" 392 | ] 393 | }, 394 | "execution_count": 14, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "#show video\n", 401 | "from IPython.display import HTML\n", 402 | "import os\n", 403 | "\n", 404 | "video_names = list(filter(lambda s:s.endswith(\".mp4\"),os.listdir(\"./videos/\")))\n", 405 | "\n", 406 | "HTML(\"\"\"\n", 407 | "\n", 410 | "\"\"\".format(\"./videos/\"+video_names[-1])) #this may or may not be _last_ video. Try other indices" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 67, 416 | "metadata": {}, 417 | "outputs": [ 418 | { 419 | "ename": "TypeError", 420 | "evalue": "argument of type 'NoneType' is not iterable", 421 | "output_type": "error", 422 | "traceback": [ 423 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 424 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", 425 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msubmit\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msubmit_cartpole\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0msubmit_cartpole\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mgenerate_session\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"matcha.11@samsung.com\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"Li6fIhJESRzvvx5T\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 426 | "\u001b[1;32m~\\Downloads\\Practical_RL\\week5_policy_based\\submit.py\u001b[0m in \u001b[0;36msubmit_cartpole\u001b[1;34m(generate_session, email, token)\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mgrader\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgrading\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mGrader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"oyT3Bt7yEeeQvhJmhysb5g\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mgrader\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_answer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"7QKmA\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msession_rewards\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[0mgrader\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msubmit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0memail\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtoken\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 427 | "\u001b[1;32m~\\Downloads\\Practical_RL\\grading.py\u001b[0m in \u001b[0;36msubmit\u001b[1;34m(self, email, token)\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[0md\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 35\u001b[1;33m \u001b[1;32mif\u001b[0m \u001b[0md\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;34mu'details'\u001b[0m \u001b[1;32min\u001b[0m \u001b[0md\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;34mu'learnerMessage'\u001b[0m \u001b[1;32min\u001b[0m \u001b[0md\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34mu'details'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 36\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0md\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34mu'details'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34mu'learnerMessage'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 37\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Hint: try generating new token and make sure you spelled it correctly\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 428 | "\u001b[1;31mTypeError\u001b[0m: argument of type 'NoneType' is not iterable" 429 | ] 430 | } 431 | ], 432 | "source": [ 433 | "from submit import submit_cartpole\n", 434 | "submit_cartpole(generate_session, \"matcha.11@samsung.com\", \"Li6fIhJESRzvvx5T\")" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": { 441 | "collapsed": true 442 | }, 443 | "outputs": [], 444 | "source": [ 445 | "# That's all, thank you for your attention!\n", 446 | "# Not having enough? There's an actor-critic waiting for you in the honor section.\n", 447 | "# But make sure you've seen the videos first." 448 | ] 449 | } 450 | ], 451 | "metadata": { 452 | "kernelspec": { 453 | "display_name": "cpuenv", 454 | "language": "python", 455 | "name": "cpuenv" 456 | }, 457 | "language_info": { 458 | "codemirror_mode": { 459 | "name": "ipython", 460 | "version": 3 461 | }, 462 | "file_extension": ".py", 463 | "mimetype": "text/x-python", 464 | "name": "python", 465 | "nbconvert_exporter": "python", 466 | "pygments_lexer": "ipython3", 467 | "version": "3.7.1" 468 | } 469 | }, 470 | "nbformat": 4, 471 | "nbformat_minor": 1 472 | } 473 | -------------------------------------------------------------------------------- /week3_experience_replay.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Honor Track: experience replay\n", 8 | "_This notebook builds upon `qlearning.ipynb`, or to be exact, generating qlearning.py._\n", 9 | "\n", 10 | "There's a powerful technique that you can use to improve sample efficiency for off-policy algorithms: [spoiler] Experience replay :)\n", 11 | "\n", 12 | "The catch is that you can train Q-learning and EV-SARSA on `` tuples even if they aren't sampled under current agent's policy. So here's what we're gonna do:\n", 13 | "\n", 14 | "\n", 15 | "\n", 16 | "#### Training with experience replay\n", 17 | "1. Play game, sample ``.\n", 18 | "2. Update q-values based on ``.\n", 19 | "3. Store `` transition in a buffer. \n", 20 | " 3. If buffer is full, delete earliest data.\n", 21 | "4. Sample K such transitions from that buffer and update q-values based on them.\n", 22 | "\n", 23 | "\n", 24 | "To enable such training, first we must implement a memory structure that would act like such a buffer." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "Starting virtual X frame buffer: Xvfb.\n", 37 | "env: DISPLAY=:1\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "%load_ext autoreload\n", 43 | "%autoreload 2\n", 44 | "import numpy as np\n", 45 | "import matplotlib.pyplot as plt\n", 46 | "%matplotlib inline\n", 47 | "from IPython.display import clear_output\n", 48 | "\n", 49 | "#XVFB will be launched if you run on a server\n", 50 | "import os\n", 51 | "if type(os.environ.get(\"DISPLAY\")) is not str or len(os.environ.get(\"DISPLAY\"))==0:\n", 52 | " !bash ../xvfb start\n", 53 | " %env DISPLAY=:1" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 12, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "import random\n", 65 | "class ReplayBuffer(object):\n", 66 | " def __init__(self, size):\n", 67 | " \"\"\"\n", 68 | " Create Replay buffer.\n", 69 | " Parameters\n", 70 | " ----------\n", 71 | " size: int\n", 72 | " Max number of transitions to store in the buffer. When the buffer\n", 73 | " overflows the old memories are dropped.\n", 74 | " \n", 75 | " Note: for this assignment you can pick any data structure you want.\n", 76 | " If you want to keep it simple, you can store a list of tuples of (s, a, r, s') in self._storage\n", 77 | " However you may find out there are faster and/or more memory-efficient ways to do so.\n", 78 | " \"\"\"\n", 79 | " self._storage = []\n", 80 | " self._maxsize = size\n", 81 | " \n", 82 | " # OPTIONAL: YOUR CODE\n", 83 | " \n", 84 | "\n", 85 | " def __len__(self):\n", 86 | " return len(self._storage)\n", 87 | "\n", 88 | " def add(self, obs_t, action, reward, obs_tp1, done):\n", 89 | " '''\n", 90 | " Make sure, _storage will not exceed _maxsize. \n", 91 | " Make sure, FIFO rule is being followed: the oldest examples has to be removed earlier\n", 92 | " '''\n", 93 | " data = (obs_t, action, reward, obs_tp1, done)\n", 94 | " \n", 95 | " # add data to storage\n", 96 | " if(len(self._storage)==self._maxsize):\n", 97 | " self._storage.pop(0)\n", 98 | " self._storage.append(data)\n", 99 | " \n", 100 | " def sample(self, batch_size):\n", 101 | " \"\"\"Sample a batch of experiences.\n", 102 | " Parameters\n", 103 | " ----------\n", 104 | " batch_size: int\n", 105 | " How many transitions to sample.\n", 106 | " Returns\n", 107 | " -------\n", 108 | " obs_batch: np.array\n", 109 | " batch of observations\n", 110 | " act_batch: np.array\n", 111 | " batch of actions executed given obs_batch\n", 112 | " rew_batch: np.array\n", 113 | " rewards received as results of executing act_batch\n", 114 | " next_obs_batch: np.array\n", 115 | " next set of observations seen after executing act_batch\n", 116 | " done_mask: np.array\n", 117 | " done_mask[i] = 1 if executing act_batch[i] resulted in\n", 118 | " the end of an episode and 0 otherwise.\n", 119 | " \"\"\"\n", 120 | " idxes = [random.randint(0, len(self._storage)-1) for i in range(batch_size)]\n", 121 | " \n", 122 | " # collect for each index\n", 123 | " states = [self._storage[i][0] for i in idxes]\n", 124 | " actions = [self._storage[i][1] for i in idxes]\n", 125 | " rewards = [self._storage[i][2] for i in idxes]\n", 126 | " next_states = [self._storage[i][3] for i in idxes]\n", 127 | " is_done = [self._storage[i][4] for i in idxes]\n", 128 | " \n", 129 | " return np.array(states), np.array(actions), np.array(rewards), np.array(next_states), np.array(is_done)\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "Some tests to make sure your buffer works right" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 14, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "Success!\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "replay = ReplayBuffer(2)\n", 154 | "obj1 = tuple(range(5))\n", 155 | "obj2 = tuple(range(5, 10))\n", 156 | "replay.add(*obj1)\n", 157 | "assert replay.sample(1)==obj1, \"If there's just one object in buffer, it must be retrieved by buf.sample(1)\"\n", 158 | "replay.add(*obj2)\n", 159 | "assert len(replay._storage)==2, \"Please make sure __len__ methods works as intended.\"\n", 160 | "replay.add(*obj2)\n", 161 | "assert len(replay._storage)==2, \"When buffer is at max capacity, replace objects instead of adding new ones.\"\n", 162 | "assert tuple(np.unique(a) for a in replay.sample(100))==obj2\n", 163 | "replay.add(*obj1)\n", 164 | "assert max(len(np.unique(a)) for a in replay.sample(100))==2\n", 165 | "replay.add(*obj1)\n", 166 | "assert tuple(np.unique(a) for a in replay.sample(100))==obj1\n", 167 | "print (\"Success!\")" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "Now let's use this buffer to improve training:" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 15, 180 | "metadata": { 181 | "collapsed": true 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "import gym\n", 186 | "from qlearning import QLearningAgent\n", 187 | "\n", 188 | "env = gym.make(\"Taxi-v2\")\n", 189 | "n_actions = env.action_space.n" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 19, 195 | "metadata": { 196 | "collapsed": true 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "def play_and_train_with_replay(env, agent, replay=None, \n", 201 | " t_max=10**4, replay_batch_size=32):\n", 202 | " \"\"\"\n", 203 | " This function should \n", 204 | " - run a full game, actions given by agent.getAction(s)\n", 205 | " - train agent using agent.update(...) whenever possible\n", 206 | " - return total reward\n", 207 | " :param replay: ReplayBuffer where agent can store and sample (s,a,r,s',done) tuples.\n", 208 | " If None, do not use experience replay\n", 209 | " \"\"\"\n", 210 | " total_reward = 0.0\n", 211 | " s = env.reset()\n", 212 | " \n", 213 | " for t in range(t_max):\n", 214 | " # get agent to pick action given state s\n", 215 | " a = agent.get_action(s)\n", 216 | " \n", 217 | " next_s, r, done, _ = env.step(a)\n", 218 | "\n", 219 | " # update agent on current transition. Use agent.update\n", 220 | " agent.update(s, a, r, next_s)\n", 221 | " \n", 222 | "\n", 223 | " if replay is not None:\n", 224 | " # store current transition in buffer\n", 225 | " replay.add(s, a, r, next_s, done)\n", 226 | " \n", 227 | " # sample replay_batch_size random transitions from replay, \n", 228 | " # then update agent on each of them in a loop\n", 229 | " states, actions, rewards, next_states, is_done = replay.sample(replay_batch_size)\n", 230 | " for i in range(replay_batch_size):\n", 231 | " agent.update(states[i], actions[i], rewards[i], next_states[i])\n", 232 | " \n", 233 | " s = next_s\n", 234 | " total_reward +=r\n", 235 | " if done:break\n", 236 | " \n", 237 | " return total_reward" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 20, 243 | "metadata": { 244 | "collapsed": true 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "# Create two agents: first will use experience replay, second will not.\n", 249 | "\n", 250 | "agent_baseline = QLearningAgent(alpha=0.5, epsilon=0.25, discount=0.99,\n", 251 | " get_legal_actions = lambda s: range(n_actions))\n", 252 | "\n", 253 | "agent_replay = QLearningAgent(alpha=0.5, epsilon=0.25, discount=0.99,\n", 254 | " get_legal_actions = lambda s: range(n_actions))\n", 255 | "\n", 256 | "replay = ReplayBuffer(1000)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 21, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "Baseline : eps = 2.9191091959171894e-05 mean reward = 8.4\n", 269 | "ExpReplay: eps = 2.9191091959171894e-05 mean reward = 8.4\n" 270 | ] 271 | }, 272 | { 273 | "data": { 274 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD8CAYAAAB6paOMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3Xl8VPW9+P/Xe2YykxUSFqMsGlBQ\nESIgi4hL3AC9FvTWW7UuWGtRq7fbbav+vNblVuu1/ba3et1oxda6X+tC0VaEOlqsCOKC7ESIEPaw\nZF9mzvn8/jgnG05IyMxkkpz38/GYx5z5nM855z2fTOY9n7N8jhhjUEop5V2+VAeglFIqtTQRKKWU\nx2kiUEopj9NEoJRSHqeJQCmlPE4TgVJKeZwmAqWU8jhNBEop5XGaCJRSyuMCqQ6gIwYMGGAKCgo6\nvXx1dTVZWVmJC6gH07ZoTdujNW2PZr2hLVasWFFmjBnYXr0ekQgKCgr46KOPOr18OBymqKgocQH1\nYNoWrWl7tKbt0aw3tIWIfNmRerprSCmlPE4TgVJKeZwmAqWU8jhNBEop5XGaCJRSyuM0ESillMdp\nIlBKKY9L2XUEIjID+C3gB35vjHkgVbGo3iNi2c4jaojYzdPpQR9Bvw+/T0jz+wgFnN9AUdtQ22AR\ntW2ilrNM1DJELUODZdMQtWmwbCzbxnbv6hrwCQGfj4Bf8IlgMFi2we8Tgn4fIgJAy9vAGsC2DRHL\nYDDufKiqj1IbsfCJ4BfB7xN8AhHLYBkDxuAzUXx2A347gs+OIOIDgUCkmkCkAsFgDNRHozRELKK2\nwbJs/D4I+IWAuM8+HwEx+LHBWIhtISaKGAsxFlZ6P3bv2svazz9CjE0Ai4DYBMSmoaGBaDQCtgXu\ncthRrGBfTCAdjA22jWVFsCMNWNF6jBVBjOW8UQDxYcSHET/4Avh9fqL4qPD1oS5iABufADhtmx4Q\nfKZ1jGJsxNgYBL9Vhy9a67RJ0zzL3WaL17bVaj6AER+2+DD4m2Iy4kd87iMQwNq2neL6dVj11dRa\ngpGAu5zfmfb5sX1BjD+Ez65HovXOsuLD5xMsSSPiCxLw+fD7/fj9AXx+P1gRJFoHVj2+aB1i1eO3\n68mQKEGJYtsGY2yMMVi2jZV1JMPPn5PU/5uUJAIR8QOPAOcDpcByEZlvjFmTinjU4TPGUB+1qayL\nUl0fpaq++dmZtppeN1g2FbURDtRGSA/4nS/Cxi9Vv2AbiFo2Udv5Ao7ahqhtY9mGiNX4bKhpiHKg\nJkJtg0VWKIDfJ4hAXcSmLmJR534JHraFf2v1MkAUGx/pNJBLFenSQIhI06OONAAsfPSVanKpJp0G\nguLMDxIhSJSgOM+NywUl2jQvRENTnX445Y11Wq4jRASfdN19xUcCrO2yzXVrJ4Lz7ZRi6wPHQ29M\nBMAkoNgYswlARF4AZgGaCLqYZRv2VtWzq6Ke/TUNlLtf2BXuo6o+SmVdlG0HaqmojTR/6TdYWB38\n0g36fWSnB8jNSKMuYiHifIEDRC3nl7TfJ+6v1uZf243TaX4f6WlCfp8QeZlB0tP81DREnS99A6E0\nPxlpfjKCPtIDftICzjJBv5BOPZKWSbTmAIG6vYRqdxOq202odjdZDXvw799Mvww/fhMhq6GMjIa9\npEcOYBCE+L6ADT4sfxDbF8T2pTX9erSbykJIWh8kkO6+DmL50rB8QWoDIWoDIUxj3cZnSQNjYwAr\nkEk02BeDD3y4792P3+fDL4JlIGoMtg0R22DZTg/IFl/zL1oC2D4/Bh8ZFZsoLSlm6NEF2OLHwk/E\n9hFFCASCpAUCGL/za1h8fvD5Sastc+IRP4gPvz+APxjCHwiCPw3xOb+iwSDGbvqlbiwLy4oQMFGy\nGspIC4acuu4v4ahlUW/7oHH5xmfxN63P+NOxAxngDzXP8wXcnocTn1PuvNemckCw8eH0LnxuDwLb\nxrYiWFYUy4qy8rOVHD/qJNIzsskM+vBjIbbTExLbAjuCROsxVj3Gnw6BEMbY2JaNbdv4TQN+qx7L\nNkRtC8uysKNRp54/hAmEIJCOBEJEfUGqogFqLB8+nw+/z4fP7yPN76d/TkZcn8OOSFUiGAxsbfG6\nFJicolg8wbYNuyrrWLvXYm34Cz7beoB1Oyso3V/b5q/oYMBHdihAVsjPkNxMjh2YTVYoQE66U5YV\nCjjzgwGy093pUIDskJ/sUBpZIT+ZQeeXexLeEPh8UF8FZRtg7zqo2ulMW1HYXwL7voCqXYBArC/1\nQAY1aXlkBnIhmAX5J0J2PmQfgUTrIb0vZPaDtExw/2nxB6Gu3Hk2FqTnunWcLyQCIWdeIB3xB3rG\nGC5NzqAsHOakHj6sQqKU7d7LKSePTXUYXSJVn9NY3wyt/lNFZA4wByA/P59wONzpjVVVVcW1fE+z\nv85mzV6LkgqbLRU2VRHDrhpD1G6ssY78TGFojo/RBQH6pQt9Q0JOUMgKCFlpkJkmBP0t/0x17uMg\n9e6jEiLAAfcRF2MINuwjVL+PYMMBAtFq0ut2klNZTKi+jFD9PgLRaiJpfQg17Gu1aCSQg+0LUJtx\nFLXZY2jodwZ+q5b60EAagrnUh/rREOxHfagflj+TqupqsrOzvxpDAIgCFV8JDujTcovArnjfcbfh\ntf+VQ/FSW6QqEZQCQ1u8HgJsb1nBGDMXmAswYcIEE8/gT71h8Ki21DZYfFZ6gBVf7mfDrkrW7qhg\nw65aALKCfo49IoejsoJceEQ2x/TP4kBpMd+84Az6ZQVTHDnOQcTqPVCxHXZ8BhXbYN9m2Pye8+u+\nFYGBJ8CgEyCrP1SXEQrlwIDjYeBIGDASsvNJy+wHQAjI7UAIvfmz0RnaHs281BapSgTLgREiMgzY\nBlwOfDNFsfQolm34cNNeFny+g1XbylmzvaJp186QvAyGDcji0lOGcPpxAznhyBx8B+2WCYc3py4J\nWBGwGqB4Eaz6M2xYCNHa1nWyBkLBGTB0EvQZ7Dz8AcgrcHbVKKUSLiWJwBgTFZFbgLdwTh+dZ4xZ\nnYpYegJjDKu3V/D6p9uY/9l2dlXUkx0KcPLQvsw5czgTCvIYNzSPvO7wK/9gW5fB0kdhy4dQ2aLT\nlzUQCr8B/YZBv2OdX/vZA5197pKEYwpKqTal7FiWMeZN4M1Ubb8n2FfdwB/+WcIbK7fzxZ5q0vzC\nWSOP4M6LBnHeifmkp/lTHWJsNftg7V/gk2egdBmE+sLRk2H0vzq/6odMdH71+3vWoVSleiv9T+yG\nahssHn/3C55cspnqhihThvfnutOHceHoo7rnr/5GVhSW/x7euR/qy6HfcLjgQRh7JYRiHJBVSnUL\nmgi6mXc37OHO11axZV8NF4w+kh+dP5IR+TmpDqt9X/4T3vwJ7FoFw8+G8+6Co8bqbh6legBNBN3E\n3qp67vnLGuZ/tp3hA7N4/junMuXY/qkOq32VO+Htn8HKF6HvUPjGn+DEr2kCUKoH0UTQDSzbvI9/\nf/5j9ldH+MF5I7ip6FhCgW66/79RtAGWzYXwA2DVwxk/hjP+A4KZqY5MKXWYNBGkUMSyeXjxRh4J\nf8HQvAyeunkSowb1aX/BVDIGPn0O/na7cxzguPPhgv+G/semOjKlVCdpIkiR7Qdq+f4Ln7C8ZD+X\njBvMPbNOok96WqrDiq12P6z4gzucw3rnjKChp0LRrc7xAN0NpFSPpokgBZZt3sf1f1xOg2Xz28vH\nMmvs4FSH1LZNYXj1Rqjc0Vx29n/CGT8CXzfffaWU6hBNBF2opiHKrxdu4OmlXzIkL4N5sydSMCAr\n1WHFZlvO/v/3fukM3/C1h6ChEnILYMgpqY5OKZVAmgi6SHV9lCt//yGflR7g4rGDufOiUd1jvJ9Y\nbBtevwU+ew7GXeVcCxDspglLKRU3TQRdwLIN33/hU1aWHuCRb47nwjFHpTqktm1+Dxb8CPZudHYB\nnfWTVEeklEoyTQRd4IG/rmXR2l3cM/Ok7psEvvg7fPIsrHrZeX32HZoElPIITQRJ9vyyLfzuH5uZ\nPeUYZp9WkOpwvqqhBtbOh1dvcF5PucVJAno9gFKeoYkgid5Zv5s7X1tF0fEDufOiUakO56ve/y28\nfRdgnEHg/u2Pzlj/SilP0USQJOt3VnLD0ys47ohsHr5iHAG/L9Uhtbby/5wkMHg8jJoFp3wL0rv5\nxWxKqaTQRJAEUcvmP/7vU3LSAzxz/WRyutuFYhsXwSvXw6BxcO0bzv12lVKepYkgCea9v5lV2yp4\n9MrxDMgOpTqcVnL3r4TlD8PAE2H2Ak0CSim62f6Knm/rvhp+8/ZGzjsxnwtGH5nqcFp790HGfnYn\npKXDvz2l9whQSgHaI0i4n7+xBp/AvbNOQrrTGDxbl0H4F9gSwHftG5B7dKojUkp1E9ojSKDPth7g\nrdW7uOGsYxmU2012uZSugN+dC09dCH2H8v7UpzUJKKVa0R5BAv1q4Xr6ZQW57vRhqQ7FUbEdnpoB\nVgPkDIJvPI214UCqo1JKdTPaI0iQDzft5R8by7jprGPJDnWT/LrgRyB+uOQJ+MHnMGhsqiNSSnVD\nmggSwBjD/1u4gSNyQlw95ZhUh+MMGveX78OGv8LZt8PJl4O/myQnpVS3o4kgAd7bWMaykn38+znH\nkZ7WDcbof+1G50YywWyYfFOqo1FKdXOaCOLk9AbWMyQvg8smdoODsOv/5txIPr0v3PRPCHTToa6V\nUt1GXIlARP5NRFaLiC0iEw6ad7uIFIvIehGZ3qJ8hltWLCK3xbP97iC8YQ8rS8v53jkjCAZSnFej\n9fC325wbyfy4GPK6wW4qpVS3F++O41XAvwJPtCwUkVHA5cBJwCBgkYiMdGc/ApwPlALLRWS+MWZN\nnHGkzBPvfsHg3AwuGZ/C203aFlgR+ORPsH8zXPln7QkopTosrkRgjFkLxLpwahbwgjGmHtgsIsXA\nJHdesTFmk7vcC27dHpkIindXsXTTPm6dcQJpqRxUbv734NNnnOmhk+G4c1MXi1Kqx0nWqSSDgaUt\nXpe6ZQBbDyqfnKQYku65D7eQ5hf+bcKQ1ARQVw5/ugS2rWguO/On0J2uaFZKdXvtJgIRWQTEGjTn\nDmPM620tFqPMEPuYhGlju3OAOQD5+fmEw+H2Qm1TVVVVXMvH0mAZXlxWw7iBflZ99EFC191RY1be\nS/99K4gEctiVX4TPbmBDqR+2hdtcJhlt0ZNpe7Sm7dHMS23RbiIwxpzXifWWAkNbvB4CbHen2yo/\neLtzgbkAEyZMMEVFRZ0IwxEOh4ln+Vj+tmoH1ZGPufnCUzhr5MCErrtDdq+F8Ao4/kLSLn+OIW4v\nYFA7iyWjLXoybY/WtD2aeaktkrVjez5wuYiERGQYMAJYBiwHRojIMBEJ4hxQnp+kGJLq9U+3MyA7\nyNRjU3RHr38+DIEMmPm/uitIKRWXeE8fvURESoEpwBsi8haAMWY18BLOQeC/ATcbYyxjTBS4BXgL\nWAu85NbtUSrrIixet5t/GXNUau48Vr4NVr4E46/RW0sqpeIW71lDrwKvtjHvPuC+GOVvAm/Gs91U\ne3vNLhqiNjPHpuCUUduGxfeAsWHKzV2/faVUr6NXFnfCorW7yO8TYvzRuV2/8bfvdK4cPv2HesGY\nUiohdCSywxSxbP6xoYx/KTyqa288s/1TmHuWM933aDjnP7tu20qpXk17BIfpHxv3UFkf5ewTjui6\njdoWPHtp8+trXtMDxEqphNEewWH69dsbyEkPcPpxA7puo6tegeo9kFcAY6+C/sd23baVUr2eJoLD\nsKO8llXbKvjpjOPJ6qqbz9RXOgPJDRoH1y8GXzcY5lop1atoIjgM767fA8C5J+R3zQatKLx1B9SU\nwTdf1CSglEoKTQSH4Z9f7OWInBAj87OTv7HaA/C/E6F6Nww8AYZMaH8ZpZTqBD1Y3EHGGJZu2sup\nw/t3zdlCC+9wkgDAv/4u+dtTSnmW9gg6aHNZNbsr6zl1eJKv5I3UwfOXwaYwTPwOzPgF+NOSu02l\nlKdpj6CDlm7aB8CUZI8t9NE8JwkAnPkTTQJKqaTTHkEHvfn5DvL7hCjon5mcDUTqnLODVjzlvL74\nMcjpooPSSilP0x5BB6zeXs6S4jLOGjkweccHihc1J4ELfgljv5mc7Sil1EE0EXTA8s3ObqHvnTsi\neRvZ8Nfm6ZHTkrcdpZQ6iO4a6oCPtxwgv0+IwbkZydmAbcH6v8Gx58Kp33WuIFZKqS6iPYIO+HjL\nfsYfnZe83UIlS5yLxsZdCSM6c0M4pZTqPE0E7dhdWUfp/lpOOSYveRv5+GlI7wvHX5i8bSilVBs0\nEbTj4y8PADDu6CQlgpp9sHY+FF4GaUna9aSUUoegiaAdn2zZT9DvY/TgPsnZwMqXwGpwbjuplFIp\noImgHR9v2c9Jg/sQCiRhwDdj4OM/wqDxcOSYxK9fKaU6QBPBIdi2YfX2CgoH903OBko/gt1rtDeg\nlEopTQSHULq/lpoGixOOSsJuISsKr3wHgjkw+uuJX79SSnWQXkdwCOt2VgBw/JE5iV/56zfD/s3O\nUBLpSTr+oJRSHaA9gkPYsKsSgJH5CU4E+zbB5y/BuKt1KAmlVMppIjiETXuqGdQ3nexE3payZh88\nOQ3ED+f8Z+LWq5RSnRRXIhCRX4rIOhFZKSKvikhui3m3i0ixiKwXkektyme4ZcUicls820+2zXur\nKRiQldiVLn3UuRH9mT+GnCMTu26llOqEeHsEbwOjjTGFwAbgdgARGQVcDpwEzAAeFRG/iPiBR4AL\ngFHAFW7dbqmkLAmJYNO7MHQyFHXrHKiU8pC4EoExZqExJuq+XAoMcadnAS8YY+qNMZuBYmCS+yg2\nxmwyxjQAL7h1u53ymgj7ayKJvf9ApBa2fwJHT0ncOpVSKk6JPEZwHdA4lvJgYGuLeaVuWVvl3c7m\nvdUAFPRPYI+geBHYETjmtMStUyml4tTuUVARWQTE2pl9hzHmdbfOHUAUeLZxsRj1DbETj2lju3OA\nOQD5+fmEw+H2Qm1TVVXVYS//z+1OR2fP5jWE96zr9LZbGrX6MfoG+7G0NIDZfnjxJEpn2qI30/Zo\nTdujmZfaot1EYIw55LjIIjIbuAg41xjT+KVeCgxtUW0IsN2dbqv84O3OBeYCTJgwwRQVFbUXapvC\n4TCHu/wnb29AZCNfn34W6WkJGF5ix2cQfh9GX8pZ55wb//o6qTNt0Ztpe7Sm7dHMS20R71lDM4Bb\ngZnGmJoWs+YDl4tISESGASOAZcByYISIDBORIM4B5fnxxJAsJXurGdQ3IzFJoGI7PHGmMz1oXPzr\nU0qpBIr3BPn/BULA2+5NW5YaY240xqwWkZeANTi7jG42xlgAInIL8BbgB+YZY1bHGUNSlJRVMyxR\nZwx9+Ljz7A/BiRclZp1KKZUgcSUCY8xxh5h3H3BfjPI3gTfj2W6yGWPYXFbNzLGD4lvR/hL4w9eg\nfAscfRpc99d2F1FKqa6mVxbHcKAmQkVdNP4zhsIPOEkgfzRc8XxiglNKqQTTQediaDx1NK5dQ9V7\n4bPnYchEuPZNCAQTFJ1SSiWW9ghiKClzEsEx8fQI3nvQeT7lWk0CSqluTRNBDCVl1fgEju7XyauK\no/Ww4g8w9koYd1VCY1NKqUTTRBDD5r01DM7LIBjoZPPs/ByidTByRmIDU0qpJNBEEENJWXV8B4pL\nlzvPQyYmJiCllEoiTQQxbN1f0/ndQuAkgj6Doc9RiQtKKaWSRBPBQeoiFgdqIgzKzejcCqwIFC+G\nIRMSG5hSSiWJJoKD7CyvAyC/T3rnVrDqFag7oMcHlFI9hiaCg+yscBLBkZ1JBDs/h1fnONOFlyUw\nKqWUSh5NBAfZ1ZgI+oYOf+Gl7phC464CXwIGq1NKqS6gieAgnd41tGs1fPqMkwRmPZKEyJRSKjk0\nERxkZ0UdWUE/Oelph7fg6tec5zN/mviglFIqiTQRHGRXRR35fTtxfGDHpzDwRMg7JvFBKaVUEmki\nOMjO8rrDP1BsjHNTer3pjFKqB9JEcJCd5XUcebg9guJFUL0HCqYmJyillEoiTQQt2LZhd2X94fcI\nihdBIAPGfCM5gSmlVBJpImihrLqeqG0Or0dQX+ncd6Bgqg43rZTqkTQRtLCrvB44zFNHN74NdeVw\n+g+TFJVSSiWXJoIWDvuq4s9egL/8ALIGwtFTkhiZUkolj96qsoWmRNDRXUPv/Qrqy2HaQ3olsVKq\nx9IeQQsffFGG3ycMyO7A8BIlS2DvRpj6AzhldvKDU0qpJNFE0MI/NpRxZJ90/D5pv/L87znPw85I\nblBKKZVkmghctQ0WlfVRvjn56PYr1+yDfV/A2XfAceclPzillEqiuBKBiPyXiKwUkU9FZKGIDHLL\nRUQeEpFid/74FsvMFpGN7qPb7FMpq3LOGBqY04HdQo23ojxGLyBTSvV88fYIfmmMKTTGjAUWAD9z\nyy8ARriPOcBjACLSD7gLmAxMAu4Skbw4Y0iI3ZWHkQh2rnSeB41NYkRKKdU14koExpiKFi+zAONO\nzwKeNo6lQK6IHAVMB942xuwzxuwH3ga6xa289lU3ANA/qwMXhR3Y4pwyGozjBvdKKdVNxH36qIjc\nB1wDlANnu8WDga0tqpW6ZW2Vp9x+NxHkZXYgEez/EnI7cCxBKaV6gHYTgYgsAo6MMesOY8zrxpg7\ngDtE5HbgFpxdP7FOuzGHKI+13Tk4u5XIz88nHA63F2qbqqqq2l3+o01OIlj98Yd8EWj7rCGxLaZu\n+YjdR5zGhjhiSpWOtIWXaHu0pu3RzEtt0W4iMMZ09LSY54A3cBJBKTC0xbwhwHa3vOig8nAb250L\nzAWYMGGCKSoqilWtQ8LhMO0t/0HtWoJflDD93CJEDnH66J718F41g069lEEndz6mVOlIW3iJtkdr\n2h7NvNQW8Z41NKLFy5nAOnd6PnCNe/bQqUC5MWYH8BYwTUTy3IPE09yylNtf3UBeVtqhkwDA3mLn\necCIQ9dTSqkeIt5jBA+IyPGADXwJ3OiWvwlcCBQDNcC3AIwx+0TkvwD3/EvuNcbsizOGuBhj+HjL\nAfbXRDp2fKAxEfQ7NrmBKaVUF4krERhjvt5GuQFubmPePGBePNtNpGc/3MJ/vrYKgCnD+7e/wN4v\nIHMAZOQmOTKllOoanr+y+Is9VU3T/do7ddS2YctS6K+9AaVU7+H5RBD0NzdBbmbaoSvvWgVl62HU\nrCRHpZRSXcfziSDgbz443O4xgrINzvPwoqTFo5RSXc3ziaAuYjdNZwTbuafAnvUgPuh/XJKjUkqp\nruP5RFBeG2mabu/MUcrWQ14BBDowHpFSSvUQnk8EFS0Sgf9QmaB6L2x4C44s7IKolFKq62giqIuQ\nnuY0w7knHtF2xS/fh2gdTJrTRZEppVTX8Pw9i8tro5x+3AB+P3vioSuufhWCOTBkQtcEppRSXUR7\nBLUR+mS0c9ooODejOX6GHh9QSvU6nk4EW/bWsO1ALX3S20kExkDVLugzqGsCU0qpLuTpRPBZ6QEA\nRubnHLpi7X6wGiA71mjcSinVs3k6EVTVRwE454RDHCQGKC91nvscleSIlFKq63k6EVTWOaeOZqe3\nc8x8z3rnecDxSY5IKaW6nqcTQVVdFBHITDvEFcWfvwzv/xb8Ib2iWCnVK3n69NHK+ijZwQA+XxsX\nklVshz9/25kuvAwCHbhfgVJK9TCeTARrd1RwwW//AbQz4uiT05unz7snyVEppVRqeHLX0OPvftE0\nfaAmErtSzT4o3+JM37xMDxQrpXotTyaCzGAHOkKb322ezhqYvGCUUirFPJkIskPNB4fz+7RxpfCm\ncPN0Rl5yA1JKqRTyZCLIaNEj+L8bTotdqeR9GHYWfP+zDoxPrZRSPZcnE0HUcm5Gs+yOczm6f2bs\nShXbIX+0c/8BpZTqxTyZCGojFjmhAEfkpMeuEKmFSDVk9e/awJRSKgU8mQjqIhbph7otZXWZ85w5\noGsCUkqpFPJkIqhtsMg41NXEa//iPGdpIlBK9X4JSQQi8mMRMSIywH0tIvKQiBSLyEoRGd+i7mwR\n2eg+Zidi+4erusEis60egTHw0TzoOxQKzujawJRSKgXivrJYRIYC5wNbWhRfAIxwH5OBx4DJItIP\nuAuYABhghYjMN8bsjzeOw1FZF2n7HgRblsLejTDrEUjv05VhKaVUSiSiR/Ab4Kc4X+yNZgFPG8dS\nIFdEjgKmA28bY/a5X/5vAzMSEEOHvfbJNjbuqiKnrRFHV78KaZkw6uKuDEsppVImrh6BiMwEthlj\nPpPW59oPBra2eF3qlrVV3iXqIhY/ePFTAOqjduxKO1fCkYUQyu6qsJRSKqXaTQQisgiIdWuuO4D/\nD5gWa7EYZeYQ5bG2OweYA5Cfn084HG4v1DZVVVURDoepjjRvakVJWcx1nrZjNWUDprAhju11Z41t\noRzaHq1pezTzUlu0mwiMMefFKheRMcAwoLE3MAT4WEQm4fzSH9qi+hBgu1tedFB5uI3tzgXmAkyY\nMMEUFRXFqtYh4XCYoqIi9lTWw+JFAOT3zeQr67QiEK5g0PHjGRTH9rqzxrZQDm2P1rQ9mnmpLTp9\njMAY87kx5ghjTIExpgDnS368MWYnMB+4xj176FSg3BizA3gLmCYieSKSh9ObeCv+t9ExDVbz7qAn\nr5341QqN1w/oIHNKKQ9J1v0I3gQuBIqBGuBbAMaYfSLyX8Byt969xph9SYrhKxpaHBc4dmCMYwBV\nu5zn7HbuYayUUr1IwhKB2ytonDbAzW3UmwfMS9R2D0djIrj2tILYFar3OM/Z+V0TkFJKdQOeurK4\neHcVAFOPi3HFcHUZPHupM627hpRSHuKpRHDzcx8DEAzEeNvLn2ye1l1DSikP8VQiaBT0x3jbNWUt\nKmR1XTBKKZVi3kwEgRiXM+z83Hn+l193bTBKKZVinkwEAV+Mt71rDUz4Nkz8dtcHpJRSKeTJRBCx\nDhpeIlIL9eXQZ1BqAlJKqRTyZCL4ysHiLR84z3q2kFLKg5J1QVm31DjiaOGQ3NYz/nSJ8xzK6eKI\nlFIq9TyVCHwiXDz2ELt/Bp7QdcEopVQ34aldQ/VRi9DBt6iM1jvPJ86E/FFdH5RSSqWYZxKBMYb6\nqE3o4OMDP3cvHhswsuuDUkpHHkuZAAASM0lEQVSpbsAzu4YilsEYSG/sEdgWlPyjucLZd6QmMKWU\nSjHP9Agah6BO87sXk334BDw9y5keeyXEurZAKaU8wDPffpbl3J2s6WKyfZuaZx57TgoiUkqp7sEz\niSBiOz2CQGOPIC09hdEopVT34ZlEED24R1C1p3lmet8URKSUUt2DdxJBY4/A5/YIqnbCoHFwxQtw\nXMzbMiullCd4JxE09ggadw1V7oQ+g+H4C0BijEaqlFIe4Z1EYDcmAvctV+6EnCNTGJFSSnUPHkoE\nLXYNHdgKdQcgb1iKo1JKqdTzTCJ4b4NzcDjgE9j6oVM4/KwURqSUUt2DJ64sro4Y7l+8DoA0vw8q\ndjsz+gxOYVRKKdU9eKJH4B4nBsDvE9gUdl6k58asr5RSXuKJHoFlG870fUYm9QRNIWx8y5mhw0oo\npVR8PQIRuVtEtonIp+7jwhbzbheRYhFZLyLTW5TPcMuKReS2eLbfUZaBp4P/zePB/yG76suu2KRS\nSvUYiegR/MYY86uWBSIyCrgcOAkYBCwSkcZxnh8BzgdKgeUiMt8YsyYBcbSp5S2KsyrdMYaufSOZ\nm1RKqR4jWbuGZgEvGGPqgc0iUgxMcucVG2M2AYjIC27dpCaCaItjBJnlxc5E7jHJ3KRSSvUYidhJ\nfouIrBSReSKS55YNBra2qFPqlrVVnlSW3ZwJQlXbnIm0zGRvVimleoR2ewQisgiIdQnuHcBjwH8B\nxn3+f8B1QKwxGwyxE4+JUYaIzAHmAOTn5xMOh9sLtU2V1bVN0/U71gLw3tKPsP2hTq+zp6qqqoqr\nLXsbbY/WtD2aeakt2k0ExpgOjcgmIr8DFrgvS4GhLWYPAba7022VH7zducBcgAkTJpiioqKOhBHT\nxlcXU2NCZEo9eVIBCGeeM82TYwyFw2HiacveRtujNW2PZl5qi7iOEYjIUcaYHe7LS4BV7vR84DkR\n+TXOweIRwDKcnsIIERkGbMM5oPzNeGLoCMtABZnO6aOVWyEty5NJQKmuFIlEKC0tpa6uLtWhdErf\nvn1Zu3ZtqsPokPT0dIYMGUJaWlqnlo/3YPGDIjIWZ/dOCXADgDFmtYi8hHMQOArcbIyxAETkFuAt\nwA/MM8asjjOGdkVtqDGh5h1WQT0+oFSylZaWkpOTQ0FBAdIDf3hVVlaSk5OT6jDaZYxh7969lJaW\nMmxY58ZPiysRGGOuPsS8+4D7YpS/CbwZz3YPl2UMAazmgrSMrty8Up5UV1fXY5NATyIi9O/fnz17\n9rRfuQ2euLTWssEvLS4mSMtKXTBKeYgmga4Rbzt7IhFEDaS17BEYu+3KSinVhQoKCigrK0tpDJ5I\nBJYNfiwa8o5zCuorUhuQUqrXiUajqQ6h07yRCIwhDYvIgJOcguwjUhuQUqpLPPPMM0yaNImxY8dy\nww03YFkWX375JSNGjKCsrAzbtjnjjDNYuHAhJSUlnHDCCcyePZvCwkKuvvpqampqDrn+u+++mzlz\n5jBt2jSuueYaLMviJz/5CRMnTqSwsJAnnngCcE5FPfPMM7nkkksYNWoUN954I7b91T0TF198Maec\ncgonnXQSc+fOBeDJJ5/khz/8YVOd3/3ud/zoRz9KYCt5ZPTRiOX0COgzCGY9AsP0hjRKdaV7/rKa\nNdsT2xMfNagPd33tpDbnr127lhdffJH333+ftLQ0vvvd7/Lss89yzTXXcOutt3LjjTcyefJkRo0a\nxbRp0ygpKWH9+vU8+eSTTJ06lauvvppHH32UH//4x4eMY8WKFSxZsoSMjAzmzp1L3759Wb58OfX1\n9UydOpVp06YBsGzZMtasWcMxxxzDjBkzeOWVV7j00ktbrWvevHn069eP2tpaJk6cyNe//nUuv/xy\nCgsLefDBB0lLS+Opp55qSjCJ4okeQXXEOWsoFAzCuKsgd2j7CymlerTFixezYsUKJk6cyNixY1m8\neDGbNjmDTl5//fVUVlby+OOP86tfNY+ZOXToUKZOnQrAZZddxpIlS9rdzsyZM8nIcM5EXLhwIU8/\n/TRjx45l8uTJ7N27l40bNwIwadIkhg8fjt/v54orroi57oceeoiTTz6ZU089la1bt7Jx40aysrI4\n55xzWLBgAevWrSMSiTBmzJi426clT/QIKhucROBPC6Y6FKU86VC/3JPFGMPs2bP5xS9+8ZV5NTU1\nlJaWAs5QEo3XCxx89k1HzsbJymo+C9EYw8MPP8z06dNb1QmHw+2uOxwOs2jRIj744AMyMzMpKipq\nuhjv+uuv5/777+eEE07gW9/6VrsxHS5P9AgqGyz8YsDnibynlALOPfdcXn75ZXbvdm5Nu2/fPr78\n0rkfya233sqVV17Jvffey3e+852mZbZs2cIHH3wAwMsvv8zpp59+WNucPn06jz32GJFIBIANGzZQ\nXV0NOLuGNm/ejG3bvPjii19Zd3l5OXl5eWRmZrJu3TqWLl3aNG/y5Mls3bqV5557jiuuuOIwW6J9\nnkgEtQ3uQRlNBEp5xqhRo/j5z3/OtGnTKCws5Pzzz2fHjh28++67LF++vCkZBINBnnrqKQBOPPFE\n/vjHP1JYWMj+/fu56aabAPjZz37G/Pnz293m9ddfz6hRoxg/fjyjR4/mhhtuaDqbaMqUKdx2222M\nHj2aYcOGcckll7RadsaMGUSjUQoLC7nzzjs59dRTW83/xje+wdSpU8nLyyPRPPHNWO9mZ00ESnnL\nZZddxmWXXfaV8pa/tl955RUASkpK8Pl8PP7444AzxERmpjMczb333htz/XfffXer1z6fj/vvv5/7\n77//K3UzMzN58cUXv1JeUlLSNP3Xv/61zfeyZMmSVmcPJVLv7xHUVTAkWuJMp/dNaShKKXW4Dhw4\nwMiRI8nIyODcc89NyjZ6/0/k/y5grnGvKj727NTGopTqtgoKCli1alX7FTuhqKio00Na5+bmsmHD\nhsQGdJDe3yMwLYaWCPVJXRxKKdVN9e5EYA66+VkgPTVxKKVUN9a7E0H1QQM5Bbx3a0qllGpP704E\noRwqR/xr82ufP3WxKKVUN9W7E0FaOrsKvpbqKJRSKVJSUsLo0aOTsu5wOMxFF10EwPz583nggQeS\nsp2u0OvPGjrgS/zFF0op1dLMmTOZOXNmqsPotN7dIwD2SW6qQ1BKpVA0Gm0aWvrSSy+lpqaGe++9\nl4kTJzJ69GjmzJmDcU8seeihhxg1ahSFhYVce+21AFRXV3PdddcxceJExo0bx+uvv/6VbfzhD3/g\nlltuAeDaa6/le9/7HqeddhrDhw/n5Zdfbqr3y1/+smmI6rvuuiv5b76Den2PYI/d/W8+rVSv99fb\nYOfniV3nkWPggvZ3x7QcWvq6667j0Ucf5ZZbbuFnP/sZAFdffTULFizga1/7Gg888ACbN28mFAqx\ndetWAO677z7OOecc5s2bx4EDB5g0aRLnnXfeIbe5Y8cOlixZwrp165g5cyaXXnopCxcuZOPGjSxb\ntgxjDDNnzuS9997jzDPPjL8t4tTrewTlDamOQCmVSi2Hlr7qqqtYsmQJ77zzDpMnT2bMmDH8/e9/\nZ/Xq1QAUFhZy5ZVX8swzzxAIOL+TFy5cyAMPPMDYsWObRgTdsmXLIbd58cUX4/P5GDVqFLt27Wpa\nz8KFCxk3bhzjx49n3bp1TUNUp1qv7xFU1Pbc28cp1Wt04Jd7ssQa/vm73/0uH330EUOHDuXuu+9u\nGu75jTfe4L333mP+/Pncc889rF27FmMMf/7znzn++ONbrafxCz6WUKj5VPXG3U7GGG6//XZuuOGG\nRL21hOnVPYLaBouXV2wlanr121RKHULLoaWff/75puGfBwwYQFVVVdM+fNu22bp1K2effTYPPvgg\n5eXlVFVVMX36dB5++OGmL/RPPvmkU3FMnz6defPmUVVVBcC2bduahshOtV7dI6iqj1JW1cDJ/I7V\nd09LdThKqRRoHFr6hhtuYMSIEdx0003s37+fMWPGUFBQwMSJEwGwLIurrrqK8vJyjDHcfPPN5Obm\ncuedd/KDH/yAwsJCjDEUFBSwYMGCw45j2rRprF27lilTpgCQnZ3NM888wxFHdIN7qBtj4noA/w6s\nB1YDD7Yovx0odudNb1E+wy0rBm7ryDZOOeUU01nH3LrAHHPrgk4v39u88847qQ6hW9H2aC2R7bFm\nzZqErSsVKioqUh3CYYnV3sBHpgPfsXH1CETkbGAWUGiMqReRI9zyUcDlwEnAIGCRiIx0F3sEOB8o\nBZaLyHxjzJp44jiUx64czyr3QJBSSqmvinfX0E3AA8aYegBjTOMOr1nAC275ZhEpBia584qNMZsA\nROQFt27SEsEFY44iY+/6ZK1eKaV6vHiPoo4EzhCRD0XkXRGZ6JYPBra2qFfqlrVVrpRSKkXa7RGI\nyCLgyBiz7nCXzwNOBSYCL4nIcEBi1DfETjwmRhkiMgeYA5Cfn084HG4v1DZVVVXFtXxvom3RmrZH\na4lsj759+1JRUfGV0zd7CsuyqKysTHUYHWKMoa6urtN/u3YTgTGmzUvoROQm4BX3oMQyEbGBATi/\n9Ie2qDoE2O5Ot1V+8HbnAnMBJkyYYDp7dx9wBoeKZ/neRNuiNW2P1hLZHps3b6ahoYH+/fv3yGRQ\nWVlJTk73H5nAGMPevXvJzc1l3LhxnVpHvMcIXgPOAcLuweAgUAbMB54TkV/jHCweASzD6SmMEJFh\nwDacA8rfjDMGpVQ3NGTIEEpLS9mzZ0+qQ+mUuro60tN7xs2s0tPTGTJkSKeXjzcRzAPmicgqoAGY\n7fYOVovISzgHgaPAzcY494wUkVuAtwA/MM8Yo6f0KNULpaWlMWzYsFSH0WnhcLjTv7B7mrgSgTGm\nAbiqjXn3AffFKH8TeDOe7SqllEocHXtBKaU8ThOBUkp5nBgT8+zNbkVE9gBfxrGKATgHsZW2xcG0\nPVrT9mjWG9riGGPMwPYq9YhEEC8R+cgYMyHVcXQH2hataXu0pu3RzEttobuGlFLK4zQRKKWUx3kl\nEcxNdQDdiLZFa9oerWl7NPNMW3jiGIFSSqm2eaVHoJRSqg29OhGIyAwRWS8ixSJyW6rj6QoiMlRE\n3hGRtSKyWkS+75b3E5G3RWSj+5znlouIPOS20UoRGZ/ad5B4IuIXkU9EZIH7epg7dPpGEXlRRIJu\nech9XezOL0hl3MkgIrki8rKIrHM/I1O8+tkQkR+6/yOrROR5EUn36mej1yYCEfHj3A3tAmAUcIV7\n57TeLgr8hzHmRJzhwW923/dtwGJjzAhgsfsanPYZ4T7mAI91fchJ931gbYvX/w38xm2L/cC33fJv\nA/uNMccBv3Hr9Ta/Bf5mjDkBOBmnXTz32RCRwcD3gAnGmNE4Y59djlc/Gx25n2VPfABTgLdavL4d\nuD3VcaWgHV7HuTXoeuAot+woYL07/QRwRYv6TfV6wwNnqPPFOKPkLsAZAbcMCBz8OcEZDHGKOx1w\n60mq30MC26IPsPng9+TFzwbNN8nq5/6tFwDTvfrZ6LU9AvRuaLjd13HAh0C+MWYHgPt8hFutt7fT\n/wA/BWz3dX/ggDEm6r5u+X6b2sKdX+7W7y2GA3uAp9xdZb8XkSw8+NkwxmwDfgVsAXbg/K1X4NHP\nRm9OBG3dJc0TRCQb+DPwA2NMxaGqxijrFe0kIhcBu40xK1oWx6hqOjCvNwgA44HHjDHjgGqadwPF\n0mvbwz0OMgsYhnPPlCycXWEH88RnozcngkPdJa1XE5E0nCTwrDHmFbd4l4gc5c4/CtjtlvfmdpoK\nzBSREuAFnN1D/wPkikjjEOwt329TW7jz+wL7ujLgJCsFSo0xH7qvX8ZJDF78bJwHbDbG7DHGRIBX\ngNPw6GejNyeC5bh3Q3OP/F+Oc+e0Xk2cewI+Caw1xvy6xaz5wGx3ejbOsYPG8mvcM0ROBcobdxP0\ndMaY240xQ4wxBTh//78bY64E3gEudasd3BaNbXSpW7/X/OozxuwEtorI8W7RuTg3j/LcZwNnl9Cp\nIpLp/s80toUnPxspP0iRzAdwIbAB+AK4I9XxdNF7Ph2ny7oS+NR9XIizP3MxsNF97ufWF5yzq74A\nPsc5iyLl7yMJ7VIELHCnh+PcOrUY+D8g5Janu6+L3fnDUx13EtphLPCR+/l4Dcjz6mcDuAdYB6wC\n/gSEvPrZ0CuLlVLK43rzriGllFIdoIlAKaU8ThOBUkp5nCYCpZTyOE0ESinlcZoIlFLK4zQRKKWU\nx2kiUEopj/v/AbC/pe5xyberAAAAAElFTkSuQmCC\n", 275 | "text/plain": [ 276 | "
" 277 | ] 278 | }, 279 | "metadata": {}, 280 | "output_type": "display_data" 281 | } 282 | ], 283 | "source": [ 284 | "from IPython.display import clear_output\n", 285 | "from pandas import DataFrame\n", 286 | "moving_average = lambda x, span=100: DataFrame({'x':np.asarray(x)}).x.ewm(span=span).mean().values\n", 287 | "\n", 288 | "rewards_replay, rewards_baseline = [], []\n", 289 | "\n", 290 | "for i in range(1000):\n", 291 | " rewards_replay.append(play_and_train_with_replay(env, agent_replay, replay))\n", 292 | " rewards_baseline.append(play_and_train_with_replay(env, agent_baseline, replay=None))\n", 293 | " \n", 294 | " agent_replay.epsilon *= 0.99\n", 295 | " agent_baseline.epsilon *= 0.99\n", 296 | " \n", 297 | " if i %100 ==0:\n", 298 | " clear_output(True)\n", 299 | " print('Baseline : eps =', agent_replay.epsilon, 'mean reward =', np.mean(rewards_baseline[-10:]))\n", 300 | " print('ExpReplay: eps =', agent_baseline.epsilon, 'mean reward =', np.mean(rewards_replay[-10:]))\n", 301 | " plt.plot(moving_average(rewards_replay), label='exp. replay')\n", 302 | " plt.plot(moving_average(rewards_baseline), label='baseline')\n", 303 | " plt.grid()\n", 304 | " plt.legend()\n", 305 | " plt.show()\n", 306 | " " 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "### Submit to Coursera" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 22, 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "name": "stdout", 323 | "output_type": "stream", 324 | "text": [ 325 | "Submitted to Coursera platform. See results on assignment page!\n" 326 | ] 327 | } 328 | ], 329 | "source": [ 330 | "from submit import submit_experience_replay\n", 331 | "submit_experience_replay(rewards_replay, rewards_baseline, \"matcha.11@samsung.com\", \"iAJeFV783rtTfJdk\")" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": { 337 | "collapsed": true 338 | }, 339 | "source": [ 340 | "#### What to expect:\n", 341 | "\n", 342 | "Experience replay, if implemented correctly, will improve algorithm's initial convergence a lot, but it shouldn't affect the final performance.\n", 343 | "\n", 344 | "### Outro\n", 345 | "\n", 346 | "We will use the code you just wrote extensively in the next week of our course. If you're feeling that you need more examples to understand how experience replay works, try using it for binarized state spaces (CartPole or other __[classic control envs](https://gym.openai.com/envs/#classic_control)__).\n", 347 | "\n", 348 | "__Next week__ we're gonna explore how q-learning and similar algorithms can be applied for large state spaces, with deep learning models to approximate the Q function.\n", 349 | "\n", 350 | "However, __the code you've written__ for this week is already capable of solving many RL problems, and as an added benifit - it is very easy to detach. You can use Q-learning, SARSA and Experience Replay for any RL problems you want to solve - just thow 'em into a file and import the stuff you need." 351 | ] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "Python 3", 357 | "language": "python", 358 | "name": "python3" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.6.2" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 1 375 | } 376 | -------------------------------------------------------------------------------- /week3_sarsa.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## On-policy learning and SARSA\n", 8 | "\n", 9 | "This notebook builds on `qlearning.ipynb` to implement Expected Value SARSA.\n", 10 | "\n", 11 | "The policy we're gonna use is epsilon-greedy policy, where agent takes optimal action with probability $(1-\\epsilon)$, otherwise samples action at random. Note that agent __can__ occasionally sample optimal action during random sampling by pure chance." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Starting virtual X frame buffer: Xvfb.\n", 24 | "env: DISPLAY=:1\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "#XVFB will be launched if you run on a server\n", 30 | "import os\n", 31 | "if type(os.environ.get(\"DISPLAY\")) is not str or len(os.environ.get(\"DISPLAY\"))==0:\n", 32 | " !bash ../xvfb start\n", 33 | " %env DISPLAY=:1\n", 34 | " \n", 35 | "import numpy as np\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "%matplotlib inline\n", 38 | "%load_ext autoreload\n", 39 | "%autoreload 2" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "from qlearning import QLearningAgent\n", 51 | "\n", 52 | "class EVSarsaAgent(QLearningAgent):\n", 53 | " \"\"\" \n", 54 | " An agent that changes some of q-learning functions to implement Expected Value SARSA. \n", 55 | " Note: this demo assumes that your implementation of QLearningAgent.update uses get_value(next_state).\n", 56 | " If it doesn't, please add\n", 57 | " def update(self, state, action, reward, next_state):\n", 58 | " and implement it for Expected Value SARSA's V(s')\n", 59 | " \"\"\"\n", 60 | " \n", 61 | " def get_value(self, state):\n", 62 | " \"\"\" \n", 63 | " Returns Vpi for current state under epsilon-greedy policy:\n", 64 | " V_{pi}(s) = sum _{over a_i} {pi(a_i | s) * Q(s, a_i)}\n", 65 | " \n", 66 | " Hint: all other methods from QLearningAgent are still accessible.\n", 67 | " \"\"\"\n", 68 | " epsilon = self.epsilon\n", 69 | " possible_actions = self.get_legal_actions(state)\n", 70 | "\n", 71 | " #If there are no legal actions, return 0.0\n", 72 | " if len(possible_actions) == 0:\n", 73 | " return 0.0\n", 74 | "\n", 75 | " max_value = self.get_best_action(state)\n", 76 | " average_value = np.mean([self.get_qvalue(state, action) for action in possible_actions])\n", 77 | " state_value = epsilon*average_value + (1-epsilon)*average_value\n", 78 | " return state_value" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### Cliff World\n", 86 | "\n", 87 | "Let's now see how our algorithm compares against q-learning in case where we force agent to explore all the time.\n", 88 | "\n", 89 | "\n", 90 | "
image by cs188
" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 3, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "\n", 103 | " This is a simple implementation of the Gridworld Cliff\n", 104 | " reinforcement learning task.\n", 105 | "\n", 106 | " Adapted from Example 6.6 (page 145) from Reinforcement Learning: An Introduction\n", 107 | " by Sutton and Barto:\n", 108 | " http://people.inf.elte.hu/lorincz/Files/RL_2006/SuttonBook.pdf\n", 109 | " \n", 110 | " With inspiration from:\n", 111 | " https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py\n", 112 | "\n", 113 | " The board is a 4x12 matrix, with (using Numpy matrix indexing):\n", 114 | " [3, 0] as the start at bottom-left\n", 115 | " [3, 11] as the goal at bottom-right\n", 116 | " [3, 1..10] as the cliff at bottom-center\n", 117 | "\n", 118 | " Each time step incurs -1 reward, and stepping into the cliff incurs -100 reward \n", 119 | " and a reset to the start. An episode terminates when the agent reaches the goal.\n", 120 | " \n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "import gym, gym.envs.toy_text\n", 126 | "env = gym.envs.toy_text.CliffWalkingEnv()\n", 127 | "n_actions = env.action_space.n\n", 128 | "\n", 129 | "print(env.__doc__)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 4, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "o o o o o o o o o o o o\n", 142 | "o o o o o o o o o o o o\n", 143 | "o o o o o o o o o o o o\n", 144 | "x C C C C C C C C C C T\n", 145 | "\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "# Our cliffworld has one difference from what's on the image: there is no wall. \n", 151 | "# Agent can choose to go as close to the cliff as it wishes. x:start, T:exit, C:cliff, o: flat ground\n", 152 | "env.render()" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 5, 158 | "metadata": { 159 | "collapsed": true 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "def play_and_train(env,agent,t_max=10**4):\n", 164 | " \"\"\"This function should \n", 165 | " - run a full game, actions given by agent.getAction(s)\n", 166 | " - train agent using agent.update(...) whenever possible\n", 167 | " - return total reward\"\"\"\n", 168 | " total_reward = 0.0\n", 169 | " s = env.reset()\n", 170 | " \n", 171 | " for t in range(t_max):\n", 172 | " a = agent.get_action(s)\n", 173 | " \n", 174 | " next_s,r,done,_ = env.step(a)\n", 175 | " agent.update(s, a, r, next_s)\n", 176 | " \n", 177 | " s = next_s\n", 178 | " total_reward +=r\n", 179 | " if done:break\n", 180 | " \n", 181 | " return total_reward\n" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 6, 187 | "metadata": { 188 | "collapsed": true 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "from qlearning import QLearningAgent\n", 193 | "\n", 194 | "agent_sarsa = EVSarsaAgent(alpha=0.25, epsilon=0.2, discount=0.99,\n", 195 | " get_legal_actions = lambda s: range(n_actions))\n", 196 | "\n", 197 | "agent_ql = QLearningAgent(alpha=0.25, epsilon=0.2, discount=0.99,\n", 198 | " get_legal_actions = lambda s: range(n_actions))" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 7, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "name": "stdout", 208 | "output_type": "stream", 209 | "text": [ 210 | "EVSARSA mean reward = -26.54\n", 211 | "QLEARNING mean reward = -91.99\n" 212 | ] 213 | }, 214 | { 215 | "data": { 216 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEICAYAAAC9E5gJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJztnXd4FFXXwH83nZAECL1D6E16sYBB\nELALNrCBDRv6Yi9YELvv66sfdhRfFQsoNkQUAQkiSO+9l9ACBFJIT+73x8xmZ3dnk00jZc/vefbZ\nmTt3Zu7dnbnnnnPPPVdprREEQRD8m4DyLoAgCIJQ/ogwEARBEEQYCIIgCCIMBEEQBEQYCIIgCIgw\nEARBEBBhIPgxSqmnlVKfmNstlFJaKRVU3uUShPJAhIHgt2itX9Fa31ne5fCGUqqbUmq1UirN/O7m\nJV+oUmqqUmq/UipFKbVWKXXJ2S6vULkRYSAIFRClVAjwM/AlUAv4HPjZTHcnCDgIXAjUAJ4FvlVK\ntTgrhRWqBCIMhEqBUqqRUup7pdRxpdRepdSDlmMTlVIzlVIzzJ7xGqVUV8vxJ5RSh8xj25VSgyzn\nfVnA/WYppRKVUruUUne53e9bpdQX5jU3K6V6lXKVYzEa+be11pla68mAAi5yz6i1PqO1nqi13qe1\nztNazwb2Aj1LuUxCFUaEgVDhUUoFAL8A64HGwCBgvFJqqCXbVcB3QDTwNfCTUipYKdUOGAf01lpH\nAkOBfT7c9hsgHmgEXAu84hAiJlcC04GawCzg3QLKv0EpddrL530vp3UCNmjXeDEbzPQCUUrVB9oC\nmwvLKwgORBgIlYHeQF2t9SStdZbWeg/wMTDSkme11nqm1job+C8QBvQDcoFQoKNSKtjsPe8u6GZK\nqabABcATWusMrfU64BPgFku2v7XWc7TWucA0oKvNpQDQWp+jta7p5XOfl9MigCS3tCQgspCyBwNf\nAZ9rrbcVlFcQrIgwECoDzYFG1h418DRQ35LnoGNDa52H2avXWu8CxgMTgQSl1HSlVKNC7tcISNRa\np1jS9mNoJQ6OWrbTgLBS9kRKBaLc0qKAFJu8QL4GNQ3IwtCGBMFnRBgIlYGDwF63HnWk1vpSS56m\njg2zUWwCHAbQWn+ttb4AQ6ho4PVC7ncYiFZKWXvhzYBDxSm8OaaQ6uXzoZfTNgPnKKWUJe0cvJh+\nzHxTMQTkNaaGJAg+I8JAqAysAJLNgeBqSqlApVRnpVRvS56eSqkRZu98PJAJLFNKtVNKXaSUCgUy\ngHQM05FXtNYHgaXAq0qpMKXUOcAdGOaXIqO17qS1jvDyucfLaXFmOR80XUcdPf0/veT/AOgAXKG1\nTi9OOQX/RoSBUOEx7fJXAN0wvGROYNjwa1iy/QzcAJzCsO2PMHvHocBr5jlHgXoYJqbCGAW0wNAS\nfgSe11rPK4Xq+ITWOgu4GrgVOA3cDlxtpjsmzP1mbjcH7sb4fY5atI6bzlZ5hcqPksVthMqOUmoi\n0FprfXN5l0UQKiuiGQiCIAjlJwyUUsPMCUC7lFJPllc5BEEQhHIyEymlAoEdwMUYLoArgVFa6y1n\nvTCCIAhCuWkGfYBdWus95oDYdIwZpIIgCEI5UF7hehtjmSSEoR30tWZQSo0FxgJUq1atZ9OmTSku\neXl5BAT43/CI1Nu/kHr7F77Ue8eOHSe01nV9uV55CQNlk+Zir9JaTwGmAPTq1UuvWrWq2DeLi4sj\nNja22OdXVqTe/oXU27/wpd5Kqf2+Xq+8xGk8lhmjWGaLCoIgCGef8hIGK4E2SqmWZnz2kRiRHwVB\nEIRyoFzMRFrrHHN6/VwgEPhUay3hdgVBEMqJclvvVWs9B5hTXvcXBEEQnPjfELwgCILggQgDQRAE\nQYSBIAiCIMJAEKoUeXma0goxk5Gdy4b40+Tk5gGQlZNXKtf1di/H9187jrP2wKkyu5dgT7kNIAtC\nRSUnN4/Fu05QLzKUTo1qFJhXa83+k2k0iw4nIMBuLmXB52bl5nEwMY1lexK5tmcTwoID84+fSM1k\n2Z6TXNalId+timfynzt5aHBbrunZxONaGdm5/LwrizG/Gz4Z391zLr1bRAOQlpXD3ztPUCcylB7N\nanktT0JyBt+vOcQfW47SvkEU36w44JHnovb1+OiWngQoRaBNfTOyc/lw0W5mro7n9/EDOHQqndNp\nWUz4aRNZOXmMu6g1p85kMbJ3M6KqBbHmwCle+GULG+KTaFQjjOSMHFIzcwDY88qlXn/T3DxNWlYO\ny/cksnBfNv3zdH55tNbM35pAp0ZRZOfmkZ6dy7LdJ9l4KJlXR3QhISWDoIAAGtQIy8//184TdGlc\ng+jqIV5/H29sOZzMpkNJDOpQj+jqIbguTuf5+7w1fweLth9n/OC2DOvcoMj3KytEGAhF4tSZLGqG\nB9s+8Fk5eXy4aDdf/LOfPi1r8fLVXahVjJerLDiYmMbBU2mcG1Pb68uqtWbb0RTu+Gwlh5MyAHj/\nph5c2qWhR97MnFyyczX3fbWGv3Yc57bzW/D8FZ18Lk9CcgZXvbeEI+Z9AP7acZx3b+zB9JUHeGn2\nVrLMHvk41ubnefW3rR7C4J/dJ3nyhw3sP+lc6fLTv/fSu0U0O46lcOfnqziQmEat8GDWPjfE5dyf\n1h5i/Ix1AESEBuU3xGsPnKZ9g0i2HXVdcvnPbQkMenMRJ1IzueXc5jx1SYf8Y3HbE3jmp03EnzIW\nWuv8/FyPej8+c4NZj22c16o2S3efJDzEEICHkzK4oVdTFm5PICElk0e/W89V3RtzYVsjmkJObh5P\n/rCRmavjqVEtmKR0Z327rDrIyD7NOJacwehPV3iU28H3a+Lzty/uWJ8Pb+7JY9+t54e1xoqmV3dr\nxF87TzCkY32mrzxIv5hovrmrn8szo7UmJ0/z6pxtfLpkr8v129WP5Pfx/Tmemkm9yDCXY9uOJjN+\n+rr8ss3dfNRDGCRnZPPwjHW0qhvBE8Pas3xvIseSM7i6e2PKmkqxuE1FD0ex+3gqmw4lcVW3sv/D\ntNYs3J5A1yY1qR0RChimgaW7T7L7eCqDOtSjSa1woOB6J2dks3TXST5bupdJV3Wmbf1I23wOMnNy\nue/LNSzYlsD9A1vxyMXtUArSs3MJUIqTZ7J48vsNLN55wuW8n+4/n48X7yEqLIhXR5zjcuxIUjr1\nIsMIDFBk5+YRHOi0WmZk5+b3ktcfPM1P6w7x1fIDjB/chvtiWxdYVmu907NyWbkvkVs/XQFATJ3q\nzHrgAiJCjX7QodNGz7VV3QjaP/s7AA1rhLk00gDTx/ajX0xtTqRmcuU7f+cLCyuvjejCyD7NCiwb\nwNJdJ7jj81Wkm6aRQe3rsWBbgke+BlFhHE027vPgoDZMXrATMHrMx1MzqR8VxqIdx7nz85U0rlmN\n62PyuOnSAXSb9Adaw+XnNGTR9uOEBAUQXT2EnQmpdG4cxUXt6jGgbV2u/fAfl/vViwzlg5t78PfO\nk1zQpjY9mxuaxYnUTCLDgtgQn8R1budc2LYut/RrTnJGNo9+t55WdSOIbVeXjxc7G8kujWsw6apO\nfBC3m4jQoPyGF+Cxoe0Yc14L4k+lUz8qlJrhIRxLzqDvKwvy86x6ZjBvz9/Bl8s8NZVOjaLYfDiZ\n0ec25+4LW3Hjx8s4kpRBpptJ67qeTfhudbzH+f3b1PF4Zt2Z99AAjiVn8n7cLoZ1bsCnf+9l38m0\n/OPR1UNIPJPlcd6oPs14/oqOLN55gtb1Irh88mLCggP5z/Vduf0z4z+bOro34SGBNI0OJy0rhxs+\nWsbGQ0ku1+nYMIpfH7zAoxPjYziK1VrrXgVmcuQVYVAyVu5LzH9B+rSIZtqdfQgNCizkLE9Op2Xx\nxT/7OZOZQ5cmNXhv4W7+b2S3/Eb6WHIGBxPT+L8FO/Mf3lv6NWfaMs/QI6ufGUztiFD+WLCQAQMG\nuJgeAA6cTGPAvxe6pH1/77n0bB6N1trjodNaM+7rtfy68Uh+WrPocHLzNIdOuy63O7RTfXJytW3j\nNqpPU75ZcdAjvWl0NQ4mpvPz/efz7aqDfLXceOk/ubUXtaqHcM0HS13y/9/IblzVrTEJyRnUiQj1\nMCU4/u/TaVl0m2S/UuXnt/chL09z22crPY4tfnwg9aPCGDttFXHbj9ue7+CdUd1ZvPM4364yGppV\nzwwmOT2b2hGh1KgWnJ9Pa83P6w6z9Wgy//t7H3UjQ3n3xu7Urh5K0+hq3P/1GuZsPApAbLu6XNyx\nPtf2bMLxlExSM3No3yCKaz9Yyqr9rrb0iNAgmkaHM/2ufqxdsYTY2Fhi/70wv7FqEBXGV3f1ZeG2\nBF76datH+Ts3jmLsgFZsOpTEAxe1JjIs2COPlZd/3cK+k2nM23LM49i5MbWZOqYXAUrx5bL9jOrT\njPCQQI/n6fDpdO75cjUPXdyWge3q2d7nqveWsP7gaY/0hwa35a35O3h5eGe6N61Fx0ZRtHn6V7LN\ntj8sOICv7+pH50Y1WLg9gYva1yMoQHEqLZuPFu3m3Fa16RdTm/fjducL1wcuas34wW15fOYGElIy\nOHQ6nQZRYTSoEcYPaw55lMHBi1d3pmFUGAPb1yMwQHHHZys9nvva1UM4aQqKGtWC+WXcBTSrHU6L\nJ391ybfgkQt5+oeNrNyXSJ6lSW4WHc7c8QOoFuLZpogwKAYlEQZ5eZr//LGdns1rMahDfX5YE89T\nP2xk66RhxJ9K5+r3l3j0CmY/cAHtGkS69HQLYvX+Ux4NHjgbvU2Hkrj8nb99LvPsBy5g6e4TvDJn\nGwBPDGvPvbGtAEjJyKbni/PzTRBWxpzXgs+W7qNBVBj/PHURiWeyyM7V3PTJMnYfP8Pjw9rx147j\nLNuTaHvfN645h+t7GyGn5m85xp1fFP8/A+jatCZnMnPYlZDKJZ0bELf9eH5v2vGSxdStzgc39eRA\nYhozVx8kMyePtKREerVvwftxu/OvteqZwXyyeC8fLtrt7XaM6N6YCZd1yNe4ABZuT+C2/7kKjPsH\ntmJIxwY0qlmNupFGXveXG4wXfNa6w/y+6SgXtKnD1L+N3nKz6HBmjTufmuGuJrQdx1KYte4wDw5q\nQ0iQ57OzeOdxbpm6wiP9n6cuomGNavnPubVnPfOec+nVIpqk9Gwen7meuZudjfijQ9py14CYYnVe\njiSlc+6rf7qkLX96EPWjwrycUTS01jz5/UZmrDI6D23rR/Ds5R3p38YzAKf1t3/3xu5cfk6jQq+/\nMT6JK979m9b1Ipg7foDt+MfeE2cY+J84j/SYutX55q5+HnVNy8rh4Rnr6dMymkmzPZdm+erOvpzf\nuo5Hma08eUl7RvVuxmu/b+Xank05p0kNr+2ICINiUFRhsCshlVZ1q6OUYurfe3lx9haaRYfz6NB2\nPPiNYb8d3r0xP687RPXQICaP7M4zP23y6CW3qludBY8UfF/HQ+krfVtG8/HoXpwz8Q+aRYfz+LB2\ntG8QSZ2IUP5vwU7+t2Sf7Xn7XrsMgPu/WsOvG48weVR3hnaqT6BStJ7wW6H3rRYcyJZJQwF45qdN\nfLX8AC9e1Yl+MbVpUCOMoIAAj97LlL92M2/LMT4Z3TvfdAFQKzyYO/vHsO/EGb5bHU+TWtXy7cwA\n1/dqkt/TBvjfmN4MbF+PncdSuPitv3z+rQAGtqvL+zf1pFpIIFprFu88kW8yAvjtX/35fnU8v206\nysJHY20b4W1Hk7n5kxWcSM3kjgta8uzlHT3yHExMo/8brtrW4A71mL/VU0NyNNDF4UhSOtuPpvDP\nnpN8tGgPr47owijTNGV9zjNzcjmYmEbreq7mP601F7y+kEeGtGVED8+B6KKQkZ3L3ztPcOcXqxg7\nIIanL+1Q+ElFvP57Cw3TTEED+S9+OY/aTVoyqH192jUo2NxZVPadOMPfu05waZeGRFcPYUP8aTo0\njCq0o3cyNZOeL83P3w8OVOx8+dL8/YTkDO77ag3/GtzGRcDveOkS22fQDhEGxcBXYaC1ZuSUZSzf\nm8gNvZpy/8DWDHl7ERnZ3l3qHhzUhocvbgvA6E9XsGiHq1lh24vDXMw0J1MzufLdJdSNDOXycxrm\nq+73XNiKJy9pT2ZOLgnJmR4Nyxe396F/mzoFeipk5eTR9hlnw35zhxAWHA7gSFIG+167jEU7jjP6\n0xXcfWGMy8Cf9bwezWqy5oCner7sqUH53hfFQWvN5sPJdGoUZVuHncdSeOfPXTw6pJ2HGu0QZAAX\nvRnHnuNnfLpn7xa1+Pz2PoSHuPpJOK49+4EL6Ny4YG+honDzJ8v5e9cJ2tSLYGdCqsfxaXf0oXpo\nUIEePUUhMyfXpVdfXqGcjyVnlJpGUBwqcgjrvDxNWnYuIYEBXhv5TYeSOHkmiwGFvN/ulLYwEG8i\nC/O2HGP5XsMEMmPVQX7bdITs3IKF5d0DYvK3/3NdV0ZO+Ycx57fkn90nmLPxKDuOpXBOk5okpGSw\nYm8i4742NItDp9NZZ9pEP7utN7Gm7TQ0yBhMevbyjrw4ewv1IkO544KWDGhb+PoU1odtwSMXcnDz\nKk4FR/HrhiPsP3mG537eREyd6vnCy3re7lcuRWtNUGAAD36zllnrD3NNjyZ0aBjJzf2ae4w7FBWl\nVIENb5v6kUwe1T1//5oeTdhxLIXv7z3PJd+I7o35zx87+On+8/li6T7qRYXx4aLd3D+wFbef35IF\nWxN4+scNfDPW6Vrpzs6XLyEnV9vaYUvCl3f25c9tx+gXU5uOz7l60vRpGW1r4igJxTHvlAXlKQgq\nOgEBKt9ZwRul2SEpCaIZmCSlZdN10h8e6RGhQVzRtSHzthxjzoP9qVU9hA3xSfxr+lqmj+2X77nj\nzoq9iVz/kTGwPPOecz08Nxy8MrwLN/b19EDJzdMknsnKt0n7yrajyaRn5dK9WS3i4uI4Eh7DUz9s\nzD/+9Z19Oc+0W1ZGMrJzOZCY5uL9lJenXQaRK0JP8e5pq9AaJl3VmaBARZ2Iov2PxaEi1Ls8kHp7\nRzSDYvDwt4avdWRoECmmrzXArHHnE1M3gleGO71sejavxd9PXFTg9Xo1d5oCrIIgKECx/vkhXPPB\nUlrVi7AVBACBAarIggCgfYMol/1zY2rnb/dpEV2pBQFAWHCghxtsUSd7nQ0+usWn908QKgwiDIDf\nNx3Jdwlb9exg2j1j+Jt/cmsvYupGABTJlgf2DdT9A1vx2ND2xj3HDyhJkX2mdoTTY+XjW6WBEgTB\nHr8XBhnZudzz5RrAmCQSGhTItheHsfVIMt1LONC346VLuPq9JWw5ksz654e4+J2fLSLDgnl5eGcu\nbFuXGuFn//6CIFQO/FYYnEzN5KI3F9GjWc38tCeGtQMMU0RJBQEYA7Nz/tW/xNcpKTf1bV7eRRAE\noYLjt1FLf998lKT0bBaaM0zfGdXdYxKQIAiCv+CXwmDL4WQm/LjJJe2KroXPWhQEQaiq+J0wyMvT\nXDp5sUvaOxb/dkGoVKSfhp3zC88nVHxSE2DacJhYA15vcdZv73djBiv3ecbVGVDKk4EE4azxujke\ndI/vIU2ECso/78JuM95T+tlf3MfvNIPDSekeaeJlI1R6vr+rvEsglJRsS9sUEnHWb+93wmD7USNm\nzJd39AWMAGyCUGL2LoakeMjOgEOrS3atHM/Y+LYkWEJSH99KQG4mTO4BW2eX7P7FJS8X/njWMF0J\nvjP7YcM0tGKKM61u+7NejKotDNIS4YPzqXfMGely9/FU2tSL4II2dVj33MWsfGZwORZQqDRkpsCM\nW+C053oMnNwNn18Ob3WCL66Ejy+CY5uLfo9fxhuNwkt1YffCwvO/389ld8Di6yFxN8y4CQ6vhbwS\nrlmcmwMfD4IzJ+H4dvu6W5k9HpZOhndLaXLjtl/LxVziws75sParop+3/Xf4v66Qk+k9z8EVkJEM\nq6a6prcZCnnZ9ueUIVV7zEDnwbFNBEU5g53N23KM2HbGGIG4kgoAZCRBaBQUNMt82xzYOgsCg+Ha\nT12PfXC+c/vgcuN79Wdw6b99L8NEt2Blm2ZCRH2jhxhg02crLKbYlFgY9Bz0f8T3Mrgz/UY4tAo+\nOA9SjYV3aDMERn4DgTZNR7a5+tuZ4/BuHzixHQJD4NmCFwiy5eRu4/4AsU9B/0ft71lW5OXC4v/C\nwpeM/e43eeY5sh6Cq0Mdc+W9vDzYNR++vg5CIiErBc6cgBo2KyB+NwY2/+iZ3vM2YyA5L7fUquIr\nVVszcGPvCSP0cWGrVwl+xCeD4bVm8L9LC34Bc80enl1PL8dzHIrGPV33z5w0NFV34ld7CgKAtV/C\nB+fClyPsG/5UyzoJ/1pvX+YFk5zbhQmPHX9A3Guu+XaakVcdggBg5x/Gxw5laU5ObDe+c7MK7h17\n450ezu24V2HxfzzzZKXBxBq02/aO9+scWGYIlqLyfj+nIABX011ennHvjwbAu5b/+bPLDEEAhiAA\no0Nqh50gABg80RB6eTn2x8sQvxIGp9KMP/TBiwpeQ7fSkZsNy6cY34LvHN0E8eYqZgeWGr15O/Yv\ndY4DBJraZF4uJO61zw/w492u+/+OgX+3ck3LyYJPCg54yJ6FhrnESvopeK+3sT1wAtRqUfA1ACbV\nhq9v8H786+uMRne6TQ/YnVM29V74CmyYbp//pXr2Ag8M4bPt18J7wnGveqadNpZHbXh0PiRssz/v\n06GugsVKXh78+gi8aWOfP7HDdX/xm87tSbXglYbO/SMbDOFwwHO1QnJ9HP9xUK0mbPkZjnupTxlS\npYXBabPx333KeNASU439QR3ql1uZyoTlH8Jvj8Gaz8u7JL6Tftq+p2zl1D7Dbl1WpJ103c+26eHn\nZMH/LnEKigDT4eCbUTC5m9HrDAiG7rcUfj+dBwctS2gunex6vGlfePqw53nZaa77U2IN0xZA+8uN\n73FeBq21Nho9nQs7fvc87hj0dbDdIniqe3G5jnIze2gNi163z+ueD4zfbGIN47NxpmEOWvmJM5/d\nf97jVs+0v99ybp/c6Xos/RSkWDQa67N2ZD1s/slo1Fd+AilHXM+1E0yLXrOvE8BH/Q0Nzo6lk416\nnthlXxYHzyXCBLd1pTf/5P2eZUCVFgZ55sOXYz6DJ1INdbVOMUJDV2iSzCUi7RqzisrrzeGNls79\ngytdB133/2MMwP18f9mVIdltsfNqNvGo3PM4GgqHCeXwWmOwr2FXz3MdDd6BZc60Q5Z1Of580bn9\n2G644w8IqQ6j3byBAt3GtqwaYE1jzWnqtGZtt1dhnNu6H3k5Rhm9cXSDp1ACQxCfcTOn1jTnNLib\nfdZbNILGPeGO+dDhShg1wzVfTobRwFl76vOeM75Tjho97HVfw8cDPcuz5gvPNKsm4i64Xm8Bb7Zz\n7luftY8GwHejXfNbzWO/Pe7cHm+JVFDQgPwB+/VK8jsRy96H7+80tBlHWXrdAcM/guunGZ2MYHOR\noJpmWPstIgzKDIcwqF29BAPHvi4GdHCFc0CtrMkyl1hc+u7ZuV9p4njBpg42BiodLHzZ+PZmevBG\nyjHC0o8WnGf/P4Y9/ad7XdN/e8L4PrUPpo2AzFSYOsQ1T9oJZwMG8P0dxndBz8WnQ53bDmGS7NYb\nrW5ZZ6LFBa7H3IW8VUCFOtd2SKrZEeq0gYlJEPu0kfjdGPjlX97LZmdPT0uEFR97po8xtYZcN2Hw\n0z3O7TvmQdPecMM0aDfMNV9WmmcjnGJqQn//1+hh/3SvIaDssDbG7gLOao7x9l9s+M51HMXK9jnO\nbYeW0u5Sp7AFQzieOWF/fmGsmgobv4O3uzjTzr0fuo6Ejle65u1uakG+mP9KET8TBllEhgUVfwnH\nnCx4oSbEvQ7zJxqugHYkH4apF8MvDxa7rEXCYTJIPeq7sKoouJtAAI5uhH2LPdN94c229FtusdfH\nrza0i0zLmsT/G+ZqA3aQlWL05P+vK+xeAK82hjNuC9rv/QuW/J/nudYXuv+j3sv3z3vGf7TsPWda\nDbcFjpQyPGgGmULHboC6MA4bYdnZNhuOmSvdRbeCrb+4zk84tc/z3DdaGrNhAW78zpkeZGrUVs3A\n2ig/fdhpRrMjy3Nd6CIRbzGxTYl1PZZ+yjnI680d9Yc77f93gN+fhHg3rcrhDXbhk8b3m21dx33G\nLoKgaq7ndL4GnoqH63ww2bqb2xwMMJ+fgLM7GdavhMHx1EzqFmf5wZxM4wX+1pTYf//XsFeu/p9n\nXq1hqtkT3DDD87jLdbNKPkEJnGYiMDSScnBLKxLWwcS9i+DNDs79F2rBojc8z9Ea9izyXdjlZhua\n2ScXGZ45Kz7ynvd5HyZJPVmIj31kA3gmAZ48ABc94z1fymGjQ7HU9IB5eCuMW+mZL/ZJw4wAMO95\nWGW6s6aapptaLeDhAgYZ7QYuc7Ngxs2u8xNO7/d+DTDqBdDlOqe56s+X4I0Y4388vt2ZN6S65/lj\nF8EwczzBHPD1mVt/hrYW7WLWOON7l00spm9vhU9NLS7ZZtylME4fgE8GGeMV1WpB7zuhRhPjWHA1\nz/xXvgONurm6jcbEGm7HoZEQ5sO6xg6zkDtKGUKmOJ2AEuBXwuBESmbR16JNP2V4Q7zZDnb8ZqTl\nWMw/Z04aqnic+cAv/xCSfHzoF75kTFA6vqPwvO6c2ud86K29u0+HuNqiy5oDy2C9KfT2LjZmUxZk\nW3VoMQ6m3+g0FYAxyLp1lmuevDzY8K0xocs60OiO1Sz3Yh1XQeuws7t7DNVpW/D8AoAW/SEsCrqO\nKjhfUKjRCNhdb8jL9udENfLeKDgaocxkmP2Qse3ogDTtB1EN7c8DOGekZ5p1QPX1FoYpzGqL7zTc\n85yG5xi93CsmQ5BZzozTzsH3beb4RpCXOjTqBtExxra7iaggJhw1GtcbLR2qEzsMofjlNc60yy2D\nyIfXGgLKXRjY1Qug641w33LXtBdrG++8QwiCoalaCQh2Dmhf+IR9WWJi7e/p4MF1BR/PSTc6DDvn\nFZyvFPEvYZCaSZ3IIo4XOLwAUi0j/SGWNXiPbjB8huNeMfZ/f9L7tTKSXBt+h7nBfZDSF/6vK/y3\ngzE7Mv2Uq4+31cuiIA6ugJm3F68n5eDTofDjWKNH9fnlhm30+9u95y9sFqsdvzwIv5sv3ZxHvQvP\n9/u67n92qXPb8fu4289v+o6iYMg4AAAgAElEQVRCcQwsB1gmPY2w2NRv+NL7uY26Gw3leeMKv487\n7gPHa76A4HBje+grBZ/b1caN1DqrNf2UYQpz0PdeaHeZ/bU6XQ0h4U4zkZWtvxjfhTVu4Oq9Fe5c\nm5sJRw1vGge3/uzaG59gEWIOoeighc3ysSluz/MFD3nmeXgbDP/AmERoR/V6zu3L/+t6rKtF0Ha8\nyvi+5A2n0IOCOxgXPQvRLb0ft/LVtb7lKwX8TBhkFV0zSD3mmeaYUAKeJhn3mCJnLC/A1CGGf3hq\ngquppKjC4DXLymU/32d813abO+HLNP6pF8Om7w2hUlKSLaYqbxNqwOmFUxTWTnOtz3u9jbEAB5t+\ngG9H29u/HSz00jN32Oub9rM/Dk6V36rVhEY5t+t39jxn1Ay48Vu4c4HTxHTPEu/3sMO9QZn1gNGA\nB1eH8OiiXWvgBO/Hhr0Ol7wGnb24R3orj5WCtBTt9o7ExLoK0OBqrmMNMbGu+b1pHZ2G2zfmvz/t\nlmBTbkd53QWugwiLMAirYQzKdzVnRO+yCNGgUONYX7d5JfnHw+CmmXD7H3DODVAt2jkmUMEokTBQ\nSl2nlNqslMpTSvVyO/aUUmqXUmq7UmqoJX2YmbZLKVVAN7p0ycrJIyk9uxjCoBDPFHf/5gC3KfOZ\nZgOitXMiya8Pu+bJSPa9PBnJhpruzsVuXhKF+fC7+3IXZ5aoFavtuEkf4/vIBqeN28HGmcb3hTZ/\n/W1ufvB1CxBS1oHdmbf55oYXbzM+4wj1cNDi/nn5W3D9F05twuHLb+3Zth1qvNhgPxDYbpiRJyAQ\ngswGp0Fn1x5n22Ge5xXG7j8h+0zhpi2Azmav8uGt9vZ8Bw6PpIBAiLQ06mMXeeZ1jGMUhdYXu+7f\n+jM0Pw9aDYJBzxd+vre6XjPVvjHPPuO67/DUin3K+LZqVd6EgfV/cpBpvqfdCjEXOrjtN3hgDbS5\nGJr1hRFT4IkCJip6I6WQNqiUKKlmsAkYAfxlTVRKdQRGAp2AYcD7SqlApVQg8B5wCdARGGXmLXNO\nnjHnGBRVGBRmQrGahXIy4Zjpl9zcdA90NChWX3OHau2gKLMUvZWnQRfX/SNuanvWGVctJt1NWPgS\nGM2B1kZ+qwD5+nrndosL4LvbDFfB/7Q2BhwdJGwxvu1U9yaW/sTtc+HWAhp4R+OW6emhkhzZxv4c\n99m+EZbJh73NENAPbYFetxvq//On4KlDThdJq4BVynjZL3vT2dj7wrgVhknlniUwyge32YlJhefx\nxrVTjfOjGjl/L2XzylvdWtuYg7DjNxn2fnfcg6qBd68YB95iCt3yA/S3dIyu/sBeANlx3WeG8PLW\nmDsICDbq/8R+w74/Mclw6cwvm0WzsP7WETYT7hwaYHsv5jR3mp9nH5fIFx6yzLl5q1PxrlFESiQM\ntNZbtdbbbQ5dBUzXWmdqrfcCu4A+5meX1nqP1joLmG7mLXOOpziEgeXhcUwKspuUkxRvDIxa/cHD\nazt7iXY9B0fo3kbdod0lxvaeRcaEqv8V0Av0NYxEZqp3c0dEfQi3vNQzLXb7vDx4pZHhSeLAfULR\nNwWEKrCSl2t4XUy72hi3sOPkLtj8g3P/r38bv0GSxRwWHAZj44yXGgxVPDDY8NJ4cB0062cM4nmb\n2esQRK96vmzru5paUkgkDJ9iHxt+YhI8ahl7uPTfxgxQ95c31HLuOabAe8L0wKnX3vA6KQrVahn2\n4gadfevdA1zqFpfnzgX2+QrC0RGwi5VT39LYXPZfeHyvq3+9FTuN7l9e5gVYiYktPE+3G+0FEMCj\nO41xjZrN4OYfoOPVRnphXjuOyYDVatr/3u6avAO793vAY8aEOve4U2VBjSZOzfEsxSkqqzCAjQFL\nV5h4Mw3goFu626ifgVJqLDAWoH79+sTFxRW5EBlnTjMMyMnJ4c+lhg/xgZ2biTu+jYDcTBxDTwfn\nvMnu1q4vdWycp4yK6/0xsYtMu6q7/zmwbfY7tAe2R5xLw38+Iwp8mmuwb88O9uX9ScyeaRxuNIyM\navbhMtpte8eIw+LGsr4fkrF4CfSZSo3Tm+m+zrCZpqamEhcXR9d1E6gFsH1O/u9Y89R63F87X37j\nuglL6OTw0nGfxu/A3RsIYOpgNnZ+Bof+4rxXLQIv+AatAsiLiwOawYb9gNHgqsiriOzeiR5rXRuh\nTetWcfKg4kK32yRFdSApI4+42J+NhFPQssEwmh+YmZ9nT8ubOVCM5wndnYD+35G33IfB0lKlDcT+\nnP9Mxu1KhV1xHrkc/7cdbbfPxbHK9+oe/6FJ/CyikneQGN2NnWt3YfTXCqdaevP8F3Zd15eITlzN\nnsWFr7IW0Phe2p6BHW3vM//nYlBtGHQbZrQa8U4NIhbIDKnNvhYjabfDmL+xtf14coLCSY7qQHYh\n9+taswvH6sdyNC6OWDMtbsky7yfsLmb5i0hMehjNgM0dH+e4TR0K+r+LQ6HCQCk1H2hgc2iC1vpn\nb6fZpGnsNRFbx3Gt9RRgCkCvXr10bGxsYUX1IDHhEKyEoKAgGsW0gzUbGNL/XJrVDoeZTttn0/hf\naHpisWEauHKyMcAb53ax0b8Q23IAFKDFtt9u+I6369EfGtwHb9sMLFp5Yj/8pw0tmjSiRZemsOgH\nmumDcKeNH3XSIYizSe//CP0GWWyYu3LAbKsiqlcnNjYW4pxT6vN/x4mmsIuJhT1xrscKYskG2FJ4\nNju6hJqDzKNmENvOh3vlczFcdS98foUx6QvovPk16GuZ+Vq3PdwxjxphUUTExbnWZcAAmOQUBjG9\nhxLTsSj3ryDEGmaMWC+H49zrbaV3Z2PC1PCP6Nl1JGCYxRrj7KX5RGoCrABUAN2GPwBAs4LPcDJo\nmG1DUlKWZv6P82Ivpt2u+bAD6HErHa58wfcLxP5NLaA95L/3xWlvSp3z+8K2K+jU+RpbrabA/7sY\nFGom0loP1lp3tvl4EwRgyG6rntkEOFxAepmTeMawy0c7zESbZrpmyDhtBHpLPw1z3TwvarWElqYe\n4bAzD3nZe+jgiPr2arZ1evlNMw3VNTfLCCDmCHvgLb6Qe+TFx3YbNutBz7mmN3ba3YNyzIE0dzOJ\ndUq91Ybsy4Is7mMN4HSpc7dvu5tQHD7+NX1uPlwZPgV6jnHuL//QuX3vP8ZcADsCAlzL5i0AW1Wm\neh3TI8Zm/kFRcHhR+Wo3PwtkhUYbg+AdrjLmQ7ib1SorwdWgy7W+mxNLSFm5ls4CRiqlQpVSLYE2\nGP2JlUAbpVRLpVQIxiCzjU2h9Ek8k0VoUADVQwoJRfF6c9j4rWua1Sf4jj+MSTjnjTMa92eOG9PP\nrUTY2BvB1ROjjcXD4vg2ZyPr7Y+3TnQD4+Vufp5nvmo18zcvWHITpBxzlr+OGbjL6p0QE+vcTjli\njKHEveY62Jyd4TzH3TsIDNv/MzbpF78Iz9m4uDpmdhaVqIZwmc0civEb7ReA8YZdUDnBN4LD4P6V\nrvMsKgoBAdBztP18CF95Yr9zTMjPKKlr6XClVDxwLvCrUmougNZ6M/AthkHhd+B+rXWu1joHGAfM\nBbYC35p5y5yTqVnUrh6CUgq2/1a0k4dZwtfWamFMwnEQFOLZ8/YmDLrdWPi9jm40AqlZcfeYuacQ\nG+31llmli15zzqB0DB46Bo+HT4FzH3DmdczsjHsV5jzmTP/2FmcEyHVuE6zaXmL0Fu08akLCjRfU\nMejuwFsP3hfsGv0aXgY7vRESXvz7C1C3rX2IhqpAtZouHSp/oqTeRD9qrZtorUO11vW11kMtx17W\nWrfSWrfTWv9mSZ+jtW5rHvPiGlP6nErLcpqI1n/j+4lN+0LddgXnce/Ne+uZ2IVItsN90Nl9oNbd\njdQda8/bEdMGLMLANBM16m40rnfYjEUcWGZ47GSmOle2WjPNM9+N013rbzcT1X0ORGnSov9ZU6MF\noSrjNzOQk9KzqVHN9CluNcj4vvsv7yc4sHNL9Eb9Lq7xzx+yjLQ+uM4+oqNdCAD3VZasNv5m5xZe\nDvdIivmYY/WOWcAO/3K72ayBwUZMJqvr5ixLSAX33r6D6JaG2cYaQqC2JdKjnZ97URlg0VraDvWe\nz53hHxUcOkIQ/JizuMJ0+ZKcnk29SLNhd4TSrdncGPxM3GMMrm352RmZ1IGv6vCEo4bPsnUSS43G\nxizJqEZOu32fsUZv1kHMha6rS7mTkeyc+XrJG96nvVuJauSZ1u4y4z6ZqUZcdYAwUx22m7jjPmnN\nnZFfeY8gajdA3GO0MUDfx4fyF4YyhWq9TtCvCIvflHTwVBCqMH6jGSRnZBMVZjbU6acBZdi675gP\n95prl3a8CrqYE4sck2vsltuzI7iafZyULte6DvRe+m/X2Pd9xjq3H1hjKfBhw+XztabOUAu+enBU\nq+kZjdEhcP6weEo57O+FzeJ052ZzQllRzDNXTjbGOoaWomWw3bCiDRwLguAVv9EMUjJyiAwzq5uR\nZAxiBgRA9drGx4EjhnjddiULBeAr1gbVGvXQGjzO0ZOvVoTgZN7CItst+m4d/O0zFlZMKfjarQf5\nXg4rhY11+EqmGSgwzD8H+gShLPCLblWehrSsXKIcYwYrPvKMq+/AYYIoD2+JgnraweFF84KxRnq8\n8l0IdBvUdsROAlfNoLhzAM4mjkB9viwgIgiCT/iFMMg2nWjyNYOCuOR1IwZK68FlW6iiUlT3Sasw\n6H4zDP/Q9bjVPdYqKOzCBY+Ncw4YB1cAt0xHPJmihnEWBMErfmEmyso1Bjojw4K9z/B1ENnAiO1e\n0fA2d8Eb1pDFSrlOcgNXE4vVy6nLtcbMYZ0Hvz1ubNfrANdPg2lXwXkFLK5+tuh7t1En99DIgiAU\nG78QBg7NICI0CH68p+DMFZXMlMLzWAkM5kiDi2g40PTecV9c27oUoFLQ/xEjSqJjLoQKNMIzOwgI\ngNFuobfLi/qd4AqbRekFQSg2fiEMck1hUC0k0LdFUM42Y34tfHC4MFdPG7a3/xcNHQHhrJ5OXUd5\nxph3j3EkCIJf4RfCIMd0hw+3xiW6+oPyKYwdLSyDuTEDjXUT3APpRRawrKAvWCd72U1+EwTBr6nS\nA8jK9M7JyTOkQbVgq238ertTyp9bfzJWqHJnyEueaUXB6qnkvtKaIAh+T5UWBg5yHN5E2ZbImt6W\n4qsoPLLdWHHKQZdrS+/aTW3XExIEwY+p4C1i6eAQBvUXP1O+BSkKkWWxDIjJsAroLSUIQrniH5qB\nOWagYwYaG4Ut4F2R6HmbEUOpNCmqm6ogCFUev9IMgiNMt8lbKqBHkTeueLv0ruUIFhdcvfC8giD4\nFX6hGeTmaYICFEE5aUZCiJ82hpe/ZazKJsHdBEFwwy9ahTytCQ0KgCzHmsB+KgwCAo21YgVBENyo\n4sLAcKfM0xAswkAQBMErVVwYGGggKCAAUhOMBLt1BwRBEPwYvxAGACGBClZ+XN7FEARBqJD4jTAI\nDvKbqgqCIBQZv2khgwP9pqqCIAhFxm9ayKAAMzZPqKyOJQiC4I5fTDoDCAkKgKgmEHNheRdFEASh\nwuE3mkFwYICx2H15rG0sCIJQwfEjYaCMJS9FGAiCIHjgP8IgQEF2GgSJMBAEQXDHb4RBeECOsSGa\ngSAIggd+JAyyjI3g8PItiCAIQgXEb4RBdZVtbIhmIAiC4IEfCQPRDARBELzhN8Jg68FjxkZwWPkW\nRBAEoQLiN8IgOC/d3BAzkSAIgjt+Iwwubx1qbITXLt+CCIIgVEBKJAyUUv9WSm1TSm1QSv2olKpp\nOfaUUmqXUmq7UmqoJX2YmbZLKfVkSe5fFCJyThsb4XXO1i0FQRAqDSXVDOYBnbXW5wA7gKcAlFId\ngZFAJ2AY8L5SKlApFQi8B1wCdARGmXnLnFBtrn8syz4KgiB4UCJhoLX+Q2ttzuZiGdDE3L4KmK61\nztRa7wV2AX3Mzy6t9R6tdRYw3cxbJijL9oA9bxkb4k0kCILgQWlGLb0dmGFuN8YQDg7izTSAg27p\nfe0uppQaC4wFqF+/PnFxcUUuUOaZJIa6pcUtXgJK2eavaqSmphbrd6vsSL39C6l36VCoMFBKzQca\n2ByaoLX+2cwzAcgBvnKcZpNfY6+JaLv7aq2nAFMAevXqpWNjYwsrqgenjx+Bla5psQMHFvk6lZW4\nuDiK87tVdqTe/oXUu3QoVBhorQcXdFwpNRq4HBiktXY07PFAU0u2JsBhc9tbuiAIglBOlNSbaBjw\nBHCl1o4RWgBmASOVUqFKqZZAG2AFRj+9jVKqpVIqBGOQeVZJyiAIgiCUnJKOGbwLhALzlGGHX6a1\nvkdrvVkp9S2wBcN8dL/WOhdAKTUOmAsEAp9qrTeXsAyCIAhCCSmRMNBaty7g2MvAyzbpc4A5Jbmv\nIAiCULr4zQxkAKrXLe8SCIIgVEj8QhgEOByWet9ZvgURBEGooPiFMAgi19gIKM1pFYIgCFUHvxAG\nwQ5hEBhSvgURBEGooPiJMDAjZogwEARBsMU/hIFyCIPg8i2IIAhCBcU/hIFoBoIgCAXiF8IglGxj\nQ4SBIAiCLX4hDJyagZiJBEEQ7PATYSDeRIIgCAXhH8LAMYAcFFq+BREEQaig+IUwCMkfMxAzkSAI\ngh1VWxiYK5qFiDeRIAhCgVRtYWAirqWCIAgF42fCQMxEgiAIdviHMFDiTSQIglAQfiEMZMxAEASh\nYPxEGIg3kSAIQkH4hTCQAWRBEISCEWEgCIIgVG1hYE4zoGXAMWNDhIEgCIItVVoYeCDCQBAEwRY/\nEwYygCwIgmCHfwkDh91IEARBcMG/hIEgCIJgi98Ig9yaLcq7CIIgCBUWvxEGqMDyLoEgCEKFxX+E\nQYAIA0EQBG+IMBAEQRD8SBiImUgQBMErfiMMVMbp8i6CIAhChcVvhEFAcnx5F0EQBKHC4jfCQBAE\nQfCOCANBEARBhIEgCIJQQmGglHpRKbVBKbVOKfWHUqqRma6UUpOVUrvM4z0s54xWSu00P6NLWgFB\nEASh5JRUM/i31vocrXU3YDbwnJl+CdDG/IwFPgBQSkUDzwN9gT7A80qpWiUsgyAIglBCSiQMtNbJ\nlt3qgDa3rwK+0AbLgJpKqYbAUGCe1jpRa30KmAcMK0kZBEEQhJITVNILKKVeBm4FkoCBZnJj4KAl\nW7yZ5i3d7rpjMbQK6tevT1xcXJHLlnkmiaHm9qFGl7CzGNeozKSmphbrd6vsSL39C6l36VCoMFBK\nzQca2ByaoLX+WWs9AZiglHoKGIdhBrJbOEAXkO6ZqPUUYApAr169dGxsbGFF9SDp5FFYaWw3btKU\nxsW4RmUmLi6O4vxulR2pt38h9S4dChUGWuvBPl7ra+BXDGEQDzS1HGsCHDbTY93S43y8fsmQcBSC\nIAheKak3URvL7pXANnN7FnCr6VXUD0jSWh8B5gJDlFK1zIHjIWZa2aPEi1YQBMEbJR0zeE0p1Q7I\nA/YD95jpc4BLgV1AGnAbgNY6USn1IvnGGyZprRNLWAbfCBBhIAiC4I0SCQOt9TVe0jVwv5djnwKf\nluS+xULMRIIgCF7xn+6ymIkEQRC84j8tpCxuIwiC4JUqLQyU1ZNVzESCIAheqdLCwAUxEwmCIHjF\nf1rINZ+XdwkEQRAqLP4jDKpJPDxBEARvVHFhYBkzuOCh8iuGIAhCBaeKCwNBEATBF/xHGCi7GHmC\nIAgC+JMwsA2YKgiCIIA/CQPRDARBELziP8JANANBEASv+I8wEM1AEATBK/4jDEQzEARB8Ir/CAPR\nDARBELziP8JANANBEASv+I8wkEB1giAIXvGfFlLMRIIgCF7xH2EgZiJBEASv+I8wEM1AEATBK/4j\nDEQzEARB8Ir/CAPRDARBELziP8JANANBEASv+I8wEFkgCILgFf8RBiINBEEQvOI/wkDGDARBELzi\nR8LAf6oqCIJQVPyohRTNQBAEwRv+IwzETCQIguCVqi0MlNcdQRAEwULVFgZWRDMQBEHwiv8IA9EM\nBEEQvOI/wkA0A0EQBK/4jzAQzUAQBMErpSIMlFKPKqW0UqqOua+UUpOVUruUUhuUUj0seUcrpXaa\nn9GlcX8fC3nWbiUIglDZCCrpBZRSTYGLgQOW5EuANuanL/AB0FcpFQ08D/QCNLBaKTVLa32qpOXw\noaRlfwtBEIRKSmloBm8Bj2M07g6uAr7QBsuAmkqphsBQYJ7WOtEUAPOAYaVQhsIRzUAQBMErJdIM\nlFJXAoe01uuVa2PbGDho2Y8307yl2117LDAWoH79+sTFxRW5fFlpyQwxt1evXUvK7rQiX6Myk5qa\nWqzfrbIj9fYvpN6lQ6HCQCk1H2hgc2gC8DTkt7cup9mk6QLSPRO1ngJMAejVq5eOjY0trKgeJCcm\nwApju2ePXtCkZ5GvUZmJi4ujOL9bZUfq7V9IvUuHQoWB1nqwXbpSqgvQEnBoBU2ANUqpPhg9/qaW\n7E2Aw2Z6rFt6XDHKXXTESiQIguCVYo8ZaK03aq3raa1baK1bYDT0PbTWR4FZwK2mV1E/IElrfQSY\nCwxRStVSStXC0CrmlrwaviDSQBAEwRsl9ibywhzgUmAXkAbcBqC1TlRKvQisNPNN0lonllEZXJEB\nZEEQBK+UmjAwtQPHtgbu95LvU+DT0rqv74gwEISKRnZ2NvHx8WRkZBT7GjVq1GDr1q2lWKrKgbXe\nYWFhNGnShODg4GJfr6w0g4qHaAaCUOGIj48nMjKSFi1aoIr5jqakpBAZGVnKJav4OOqttebkyZPE\nx8fTsmXLYl+vSoej0C4PlwgDQahoZGRkULt27WILAgGUUtSuXbtE2hVUcWHggjxsglAhEUFQckrj\nN/QfYSCagSAIglf8RxhI70MQBMErfiQM/KeqgiBUfHJycsq7CC74jzeRmIkEoULzwi+b2XI4ucjn\n5ebmEhgYaHusY6Monr+iU4Hnf/nll0yePJmsrCz69u3LOeecw/79+3njjTcA+Oyzz1i9ejXvvPOO\nx7lnzpzh+uuvJz4+ntzcXJ599lluuOEGJk2axC+//EJ6ejrnnXceH330EUopYmNjOe+881iyZAlX\nXnklzZo144UXXiAwMJAaNWrw119/sW/fPm655RbOnDkDwLvvvst5551X5N+lqPiPMBAzkSAIbmzd\nupUZM2awZMkSgoODue+++4iIiOCHH37IFwYzZsxgwoQJtuf//vvvNGrUiF9//RWApKQkAMaNG8dz\nzz0HwC233MLs2bO54oorADh9+jSLFi0CoEuXLsydO5fGjRtz+vRpAOrVq8e8efMICwtj586djBo1\nilWrVpXdj2DiP8JANANBqNAU1oP3RknmGSxYsIDVq1fTu3dvANLT06lXrx4xMTEsW7aMNm3asH37\nds4//3zb87t06cKjjz7KE088weWXX07//v0BWLhwIW+88QZpaWkkJibSqVOnfGFwww035J9//vnn\nM2bMGK6//npGjBgBGBPxxo0bx7p16wgMDGTHjh3FqltR8R9hIJqBIAhuaK0ZPXo0r776qkv61KlT\n+fbbb2nfvj3Dhw/36rrZtm1bVq9ezZw5c3jqqacYMmQIjz/+OPfddx+rVq2iadOmTJw40WUOQPXq\n1fO3P/zwQ5YvX86vv/5Kt27dWLduHe+88w7169dn/fr15OXlERYWVjaVd8OPRlVFGAiC4MqgQYOY\nOXMmCQkJACQmJrJ//35GjBjBTz/9xDfffOPSk3fn8OHDhIeHc/PNN/Poo4+yZs2a/Ia/Tp06pKam\nMnPmTK/n7969m759+zJp0iTq1KnDwYMHSUpKomHDhgQEBDBt2jRyc3NLt9JeEM1AEAS/pWPHjrz0\n0ksMGTKEvLw8goODee+992jevDkdO3Zky5Yt9OnTx+v5Gzdu5LHHHiMgIIDg4GA++OADatasyV13\n3UWXLl1o0aJFvgnKjscee4ydO3eitWbQoEF07dqV++67j2uuuYbvvvuOgQMHumgSZYkyYspVbHr1\n6qWLM4CSfPo4UW+3NnYeWAO1W5VyySo2suiHf1EZ671161Y6dOhQomv4e2wiB3a/pVJqtda6ly/X\n8x8zkWgGgiAIXvEfM5GMGQiCUExOnjzJoEGDPNIXLFhA7dq1y6FEpY//CAPRDARBKCa1a9dm3bp1\n5V2MMsWPzET+U1VBEISi4kctpGgGgiAI3vAfYSBmIkEQBK/4jzAQzUAQBMEr/iMMRDMQBMFHxowZ\nU+DM4dLibEQj9RX/EQaiGQiCcJYpbM2CpUuXnqWSFI64lgqCUDH47Uk4urHIp1XLzYFAL01Zgy5w\nyWsFnv/yyy/zxRdf0LRpU+rWrUvPnj1djq9evZqHH36Y1NRU6tSpw2effUbDhg35+OOPmTJlCllZ\nWbRu3Zpp06YRHh7OmDFjiI6OZu3atfTo0YPIyEgOHDjAnj17OHDgAOPHj+fBBx8EICIigtTUVOLi\n4pg4cSJ16tRh06ZN9OzZky+//BKlFHPmzOHhhx+mTp069OjRgz179jB79uwi/06FIZqBIAh+y+rV\nq5k+fTpr167lhx9+YOXKlS7Hs7OzeeCBB5g5cyarV6/m9ttvz1/bYMSIEaxcuZL169fToUMHpk6d\nmn/ejh07mD9/Pm+++SYA27ZtY+7cuaxYsYIXXniB7Oxsj7KsXbuWt99+my1btrBnzx6WLFlCRkYG\nd999N7/99ht///03x48fL7PfQjQDQRAqBoX04L2RXoLYRIsXL2b48OGEh4cDcOWVV7oc3759O5s2\nbeLiiy8GjFXVGjZsCMCmTZt45plnOH36NKmpqQwdOjT/vOuuu85l9bXLLruM0NBQQkNDqVevHseO\nHaNJkyYu9+rTp09+Wrdu3di3bx8RERHExMTQsmVLAEaNGsWUKVOKVdfC8B9hIJqBIAg2eFurAIz1\nDjp16sQ///zjcWzMmDH89NNPdO3alc8++4y4uLj8Y+6RRkNDQ/O3AwMDbccS7PKczUCi/mMmEs1A\nEAQ3BgwYwI8//kh6eiU3RX4AAAceSURBVDopKSn88ssvLsfbtWvH8ePH84VBdnY2mzdvBoyooQ0b\nNiQ7O5uvvvqqTMrXvn179uzZw759+wBjCc6ywn80AwlHIQiCGz169OCGG26gW7duNG/ePH/ZSgch\nISHMnDmTBx98kKSkJHJychg/fjydOnXixRdfpG/fvjRv3pwuXbqQkpJS6uWrVq0a77//PsOGDaNO\nnToFrq1QUvxnPYPH90J4dCmXrGJTGePblwZS78pDRVvPYOLEiURERPDoo4+WyvVKg9TUVCIiItBa\nc//999OmTRseeughWc+g2IiZSBCESsjHH39Mt27d6NSpE0lJSdx9991lch//MRPJALIgCIUwceLE\n8i6CBw899BAPPfRQmd9HNANBEMqVymCqruiUxm/oP8JANANBqHCEhYVx8uRJEQglQGvNyZMnCQsL\nK9F1/MdMJJqBIFQ4mjRpQnx8fIlm1mZkZJS4IayMWOsdFhbmMYmtqPiPMBDNQBAqHMHBwfmza4tL\nXFwc3bt3L6USVR5Ku94lMhMppSYqpQ4ppdaZn0stx55SSu1SSm1XSg21pA8z03YppZ4syf2LWNiz\nditBEITKRmloBm9prf9jTVBKdQRGAp2ARsB8pVRb8/B7wMVAPLBSKTVLa72lFMpRCCIMBEEQvFFW\nZqKrgOla60xgr1JqF+CYOrdLa70HQCk13cxb9sJANANBEASvlIYwGKeUuhVYBTyitT4FNAaWWfLE\nm2kAB93S+9pdVCk1Fhhr7qYqpbaXoIx1eCH8RAnOr6zUAaTe/oPU27/wpd7Nfb1YocJAKTUfaGBz\naALwAfAioM3vN4HbsbfJaOzHKGx9yrTWU4BSidWqlFrl65TsqoTU27+QevsXpV3vQoWB1nqwLxdS\nSn0MOJbfiQeaWg43AQ6b297SBUEQhHKipN5EDS27w4FN5vYsYKRSKlQp1RJoA6wAVgJtlFItlVIh\nGIPMs0pSBkEQBKHklHTM4A2lVDcMU88+4G4ArfVmpdS3GAPDOcD9WutcAKXUOGAuEAh8qrXeXMIy\n+ELZLA1U8ZF6+xdSb/+iVOtdKUJYC4IgCGWLH8UmEgRBELwhwkAQBEGo2sKg3EJflBFKqU+VUglK\nqU2WtGil1Dyl1E7zu5aZrpRSk826b1BK9bCcM9rMv1MpNbo86lIUlFJNlVILlVJblVKblVL/MtOr\ndN2VUmFKqRVKqfVmvV8w01sqpZabdZhhOmNgOmzMMOu9XCnVwnIt2/AwFRmlVKBSaq1Sara5X+Xr\nrZTap5TaaIb3WWWmnZ3nXGtdJT8YA9S7gRggBFgPdCzvcpWwTgOAHsAmS9obwJPm9pPA6+b2pcBv\nGHM++gHLzfRoYI/5XcvcrlXedSuk3g2BHuZ2JLAD6FjV626WP8LcDgaWm/X5Fhhppn8I3Gtu3wd8\naG6PBGaY2x3N5z8UaGm+F4HlXT8f6v8w8DUw29yv8vXGcMSp45Z2Vp7zqqwZ9MEMfaG1zgIcoS8q\nLVrrv4BEt+SrgM/N7c+Bqy3pX2iDZUBN0xV4KDBPa52ojdni84BhZV/64qO1PqK1XmNupwBbMWa0\nV+m6m+VPNXeDzY8GLgJmmunu9Xb8HjOBQUophSU8jNZ6L2AND1MhUUo1AS4DPjH3FX5Qby+clee8\nKguDxniGvmjsJW9lpr7W+ggYjSZQz0z3Vv9K/buYJoDuGL3kKl9301SyDkjAeKl3A6e11jlmFmsd\n8utnHk8CalMJ6w28DTwO5Jn7tfGPemvgD6XUamWE5IGz9JxX5fUMvIXE8Be81b/S/i5KqQjge2C8\n1jpZeQ8+WGXqro35Od2UUjWBH4EOdtnM7ypRb6XU5UCC1nq1UirWkWyTtUrV2+R8rfVhpVQ9YJ5S\nalsBeUu13lVZMygoJEZV4pipGjpmhCeY6d7qXyl/F6VUMIYg+Epr/YOZ7Bd1B9BanwbiMGzDNZVS\njo6ctQ759TOP18AwK1a2ep8PXKmU2odh3r0IQ1Oo6vVGa33Y/E7AEP59OEvPeVUWBv4S+mIW4PAW\nGA38bEm/1fQ46AckmSrmXGCIUqqW6ZUwxEyrsJj236nAVq31fy2HqnTdlVJ1TY0ApVQ1YDDGeMlC\n4Fozm3u9Hb/HtcCf2hhR9BYepkKitX5Ka91Ea90C4739U2t9E1W83kqp6kqpSMc2xvO5ibP1nJf3\n6HlZfjBG23dg2FknlHd5SqE+3wBHgGwM6X8Hhm10AbDT/I428yqMhYR2AxuBXpbr3I4xmLYLuK28\n6+VDvS/AUHM3AOvMz6VVve7AOcBas96bgOfM9BiMRm0X8B0QaqaHmfu7zOMxlmtNMH+P7cAl5V23\nIvwGsTi9iap0vc36rTc/mx1t1tl6ziUchSAIglClzUSCIAiCj4gwEARBEEQYCIIgCCIMBEEQBEQY\nCIIgCIgwEARBEBBhIAiCIAD/DyWPT6gtbd4UAAAAAElFTkSuQmCC\n", 217 | "text/plain": [ 218 | "
" 219 | ] 220 | }, 221 | "metadata": {}, 222 | "output_type": "display_data" 223 | } 224 | ], 225 | "source": [ 226 | "from IPython.display import clear_output\n", 227 | "from pandas import DataFrame\n", 228 | "moving_average = lambda x, span=100: DataFrame({'x':np.asarray(x)}).x.ewm(span=span).mean().values\n", 229 | "\n", 230 | "rewards_sarsa, rewards_ql = [], []\n", 231 | "\n", 232 | "for i in range(5000):\n", 233 | " rewards_sarsa.append(play_and_train(env, agent_sarsa))\n", 234 | " rewards_ql.append(play_and_train(env, agent_ql))\n", 235 | " #Note: agent.epsilon stays constant\n", 236 | " \n", 237 | " if i %100 ==0:\n", 238 | " clear_output(True)\n", 239 | " print('EVSARSA mean reward =', np.mean(rewards_sarsa[-100:]))\n", 240 | " print('QLEARNING mean reward =', np.mean(rewards_ql[-100:]))\n", 241 | " plt.title(\"epsilon = %s\" % agent_ql.epsilon)\n", 242 | " plt.plot(moving_average(rewards_sarsa), label='ev_sarsa')\n", 243 | " plt.plot(moving_average(rewards_ql), label='qlearning')\n", 244 | " plt.grid()\n", 245 | " plt.legend()\n", 246 | " plt.ylim(-500, 0)\n", 247 | " plt.show()\n", 248 | " " 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "Let's now see what did the algorithms learn by visualizing their actions at every state." 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 8, 261 | "metadata": { 262 | "collapsed": true 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "def draw_policy(env, agent):\n", 267 | " \"\"\" Prints CliffWalkingEnv policy with arrows. Hard-coded. \"\"\"\n", 268 | " n_rows, n_cols = env._cliff.shape\n", 269 | " \n", 270 | " actions = '^>v<'\n", 271 | " \n", 272 | " for yi in range(n_rows):\n", 273 | " for xi in range(n_cols):\n", 274 | " if env._cliff[yi, xi]:\n", 275 | " print(\" C \", end='')\n", 276 | " elif (yi * n_cols + xi) == env.start_state_index:\n", 277 | " print(\" X \", end='')\n", 278 | " elif (yi * n_cols + xi) == n_rows * n_cols - 1:\n", 279 | " print(\" T \", end='')\n", 280 | " else:\n", 281 | " print(\" %s \" % actions[agent.get_best_action(yi * n_cols + xi)], end='')\n", 282 | " print()" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 9, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "Q-Learning\n", 295 | " v > v > > v > v > v > v \n", 296 | " > > > > > > > > > > > v \n", 297 | " > > > > > > > > > > > v \n", 298 | " X C C C C C C C C C C T \n", 299 | "SARSA\n", 300 | " > > > > > > > > > > > v \n", 301 | " ^ > > ^ > > > > > ^ > v \n", 302 | " ^ ^ ^ ^ ^ ^ > ^ ^ ^ > v \n", 303 | " X C C C C C C C C C C T \n" 304 | ] 305 | } 306 | ], 307 | "source": [ 308 | "print(\"Q-Learning\")\n", 309 | "draw_policy(env, agent_ql)\n", 310 | "\n", 311 | "print(\"SARSA\")\n", 312 | "draw_policy(env, agent_sarsa)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "### Submit to Coursera" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 10, 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "Submitted to Coursera platform. See results on assignment page!\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "from submit import submit_sarsa\n", 337 | "submit_sarsa(rewards_ql, rewards_sarsa, \"matcha.11@samsung.com\", \"LKqJsFBmNlWvwwFJ\")" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": { 343 | "collapsed": true 344 | }, 345 | "source": [ 346 | "### More\n", 347 | "\n", 348 | "Here are some of the things you can do if you feel like it:\n", 349 | "\n", 350 | "* Play with epsilon. See learned how policies change if you set epsilon to higher/lower values (e.g. 0.75).\n", 351 | "* Expected Value SASRSA for softmax policy:\n", 352 | "$$ \\pi(a_i|s) = softmax({Q(s,a_i) \\over \\tau}) = {e ^ {Q(s,a_i)/ \\tau} \\over {\\sum_{a_j} e ^{Q(s,a_j) / \\tau }}} $$\n", 353 | "* Implement N-step algorithms and TD($\\lambda$): see [Sutton's book](http://incompleteideas.net/book/bookdraft2018jan1.pdf) chapter 7 and chapter 12.\n", 354 | "* Use those algorithms to train on CartPole in previous / next assignment for this week." 355 | ] 356 | } 357 | ], 358 | "metadata": { 359 | "kernelspec": { 360 | "display_name": "Python 3", 361 | "language": "python", 362 | "name": "python3" 363 | }, 364 | "language_info": { 365 | "codemirror_mode": { 366 | "name": "ipython", 367 | "version": 3 368 | }, 369 | "file_extension": ".py", 370 | "mimetype": "text/x-python", 371 | "name": "python", 372 | "nbconvert_exporter": "python", 373 | "pygments_lexer": "ipython3", 374 | "version": "3.6.2" 375 | } 376 | }, 377 | "nbformat": 4, 378 | "nbformat_minor": 1 379 | } 380 | -------------------------------------------------------------------------------- /bandits.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from abc import ABCMeta, abstractmethod, abstractproperty\n", 12 | "import enum\n", 13 | "\n", 14 | "import numpy as np\n", 15 | "np.set_printoptions(precision=3)\n", 16 | "np.set_printoptions(suppress=True)\n", 17 | "\n", 18 | "import pandas\n", 19 | "\n", 20 | "from matplotlib import pyplot as plt\n", 21 | "%matplotlib inline" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## Bernoulli Bandit\n", 29 | "\n", 30 | "We are going to implement several exploration strategies for simplest problem - bernoulli bandit.\n", 31 | "\n", 32 | "The bandit has $K$ actions. Action produce 1.0 reward $r$ with probability $0 \\le \\theta_k \\le 1$ which is unknown to agent, but fixed over time. Agent's objective is to minimize regret over fixed number $T$ of action selections:\n", 33 | "\n", 34 | "$$\\rho = T\\theta^* - \\sum_{t=1}^T r_t$$\n", 35 | "\n", 36 | "Where $\\theta^* = \\max_k\\{\\theta_k\\}$\n", 37 | "\n", 38 | "**Real-world analogy:**\n", 39 | "\n", 40 | "Clinical trials - we have $K$ pills and $T$ ill patient. After taking pill, patient is cured with probability $\\theta_k$. Task is to find most efficient pill.\n", 41 | "\n", 42 | "A research on clinical trials - https://arxiv.org/pdf/1507.08025.pdf" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "class BernoulliBandit:\n", 54 | " def __init__(self, n_actions=5):\n", 55 | " self._probs = np.random.random(n_actions)\n", 56 | " \n", 57 | " @property\n", 58 | " def action_count(self):\n", 59 | " return len(self._probs)\n", 60 | " \n", 61 | " def pull(self, action):\n", 62 | " if np.random.random() > self._probs[action]:\n", 63 | " return 0.0\n", 64 | " return 1.0\n", 65 | " \n", 66 | " def optimal_reward(self):\n", 67 | " \"\"\" Used for regret calculation\n", 68 | " \"\"\"\n", 69 | " return np.max(self._probs)\n", 70 | " \n", 71 | " def step(self):\n", 72 | " \"\"\" Used in nonstationary version\n", 73 | " \"\"\"\n", 74 | " pass\n", 75 | " \n", 76 | " def reset(self):\n", 77 | " \"\"\" Used in nonstationary version\n", 78 | " \"\"\"" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 3, 84 | "metadata": { 85 | "collapsed": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "class AbstractAgent(metaclass=ABCMeta): \n", 90 | " def init_actions(self, n_actions):\n", 91 | " self._successes = np.zeros(n_actions)\n", 92 | " self._failures = np.zeros(n_actions)\n", 93 | " self._total_pulls = 0\n", 94 | " \n", 95 | " @abstractmethod\n", 96 | " def get_action(self):\n", 97 | " \"\"\"\n", 98 | " Get current best action\n", 99 | " :rtype: int\n", 100 | " \"\"\"\n", 101 | " pass\n", 102 | " \n", 103 | " def update(self, action, reward):\n", 104 | " \"\"\"\n", 105 | " Observe reward from action and update agent's internal parameters\n", 106 | " :type action: int\n", 107 | " :type reward: int\n", 108 | " \"\"\"\n", 109 | " self._total_pulls += 1\n", 110 | " if reward == 1:\n", 111 | " self._successes[action] += 1\n", 112 | " else:\n", 113 | " self._failures[action] += 1\n", 114 | " \n", 115 | " @property\n", 116 | " def name(self):\n", 117 | " return self.__class__.__name__\n", 118 | "\n", 119 | "\n", 120 | "class RandomAgent(AbstractAgent): \n", 121 | " def get_action(self):\n", 122 | " return np.random.randint(0, len(self._successes))" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### Epsilon-greedy agent\n", 130 | "\n", 131 | "> **for** $t = 1,2,...$ **do**\n", 132 | ">> **for** $k = 1,...,K$ **do**\n", 133 | ">>> $\\hat\\theta_k \\leftarrow \\alpha_k / (\\alpha_k + \\beta_k)$\n", 134 | "\n", 135 | ">> **end for** \n", 136 | "\n", 137 | ">> $x_t \\leftarrow argmax_{k}\\hat\\theta$ with probability $1 - \\epsilon$ or random action with probability $\\epsilon$\n", 138 | "\n", 139 | ">> Apply $x_t$ and observe $r_t$\n", 140 | "\n", 141 | ">> $(\\alpha_{x_t}, \\beta_{x_t}) \\leftarrow (\\alpha_{x_t}, \\beta_{x_t}) + (r_t, 1-r_t)$\n", 142 | "\n", 143 | "> **end for**\n", 144 | "\n", 145 | "Implement the algorithm above in the cell below:" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 4, 151 | "metadata": { 152 | "collapsed": true 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "class EpsilonGreedyAgent(AbstractAgent):\n", 157 | " def __init__(self, epsilon = 0.01):\n", 158 | " self._epsilon = epsilon\n", 159 | "\n", 160 | " def get_action(self):\n", 161 | " if np.random.random() < self._epsilon:\n", 162 | " return np.random.randint(len(self._successes))\n", 163 | " else:\n", 164 | " return np.argmax(self._successes/(self._successes + self._failures))\n", 165 | " \n", 166 | " @property\n", 167 | " def name(self):\n", 168 | " return self.__class__.__name__ + \"(epsilon={})\".format(self._epsilon) " 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "### UCB Agent\n", 176 | "Epsilon-greedy strategy heve no preference for actions. It would be better to select among actions that are uncertain or have potential to be optimal. One can come up with idea of index for each action that represents otimality and uncertainty at the same time. One efficient way to do it is to use UCB1 algorithm:\n", 177 | "\n", 178 | "> **for** $t = 1,2,...$ **do**\n", 179 | ">> **for** $k = 1,...,K$ **do**\n", 180 | ">>> $w_k \\leftarrow \\alpha_k / (\\alpha_k + \\beta_k) + \\sqrt{2log\\ t \\ / \\ (\\alpha_k + \\beta_k)}$\n", 181 | "\n", 182 | ">> **end for** \n", 183 | "\n", 184 | ">> $x_t \\leftarrow argmax_{k}w$\n", 185 | "\n", 186 | ">> Apply $x_t$ and observe $r_t$\n", 187 | "\n", 188 | ">> $(\\alpha_{x_t}, \\beta_{x_t}) \\leftarrow (\\alpha_{x_t}, \\beta_{x_t}) + (r_t, 1-r_t)$\n", 189 | "\n", 190 | "> **end for**\n", 191 | "\n", 192 | "\n", 193 | "__Note:__ in practice, one can multiply $\\sqrt{2log\\ t \\ / \\ (\\alpha_k + \\beta_k)}$ by some tunable parameter to regulate agent's optimism and wilingness to abandon non-promising actions.\n", 194 | "\n", 195 | "More versions and optimality analysis - https://homes.di.unimi.it/~cesabian/Pubblicazioni/ml-02.pdf" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 5, 201 | "metadata": { 202 | "collapsed": true 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "class UCBAgent(AbstractAgent):\n", 207 | " def get_action(self):\n", 208 | " # YOUR CODE HERE \n", 209 | " Q = self._successes/(self._successes + self._failures)\n", 210 | " UCB = np.sqrt(2*np.log(self._total_pulls)/(self._successes + self._failures))\n", 211 | " return np.argmax(Q + UCB)\n", 212 | " \n", 213 | " @property\n", 214 | " def name(self):\n", 215 | " return self.__class__.__name__" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "### Thompson sampling\n", 223 | "\n", 224 | "UCB1 algorithm does not take into account actual distribution of rewards. If we know the distribution - we can do much better by using Thompson sampling:\n", 225 | "\n", 226 | "> **for** $t = 1,2,...$ **do**\n", 227 | ">> **for** $k = 1,...,K$ **do**\n", 228 | ">>> Sample $\\hat\\theta_k \\sim beta(\\alpha_k, \\beta_k)$\n", 229 | "\n", 230 | ">> **end for** \n", 231 | "\n", 232 | ">> $x_t \\leftarrow argmax_{k}\\hat\\theta$\n", 233 | "\n", 234 | ">> Apply $x_t$ and observe $r_t$\n", 235 | "\n", 236 | ">> $(\\alpha_{x_t}, \\beta_{x_t}) \\leftarrow (\\alpha_{x_t}, \\beta_{x_t}) + (r_t, 1-r_t)$\n", 237 | "\n", 238 | "> **end for**\n", 239 | " \n", 240 | "\n", 241 | "More on Tompson Sampling:\n", 242 | "https://web.stanford.edu/~bvr/pubs/TS_Tutorial.pdf" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 6, 248 | "metadata": { 249 | "collapsed": true 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "class ThompsonSamplingAgent(AbstractAgent):\n", 254 | " def get_action(self):\n", 255 | " # YOUR CODE HERE\n", 256 | " return np.argmax(np.random.beta(self._successes + 1, self._failures + 1))\n", 257 | " \n", 258 | " @property\n", 259 | " def name(self):\n", 260 | " return self.__class__.__name__" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 9, 266 | "metadata": { 267 | "collapsed": true 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "from collections import OrderedDict\n", 272 | "def get_regret(env, agents, n_steps=5000, n_trials=50):\n", 273 | " scores = OrderedDict({\n", 274 | " agent.name : [0.0 for step in range(n_steps)] for agent in agents\n", 275 | " })\n", 276 | "\n", 277 | " for trial in range(n_trials):\n", 278 | " env.reset()\n", 279 | " \n", 280 | " for a in agents:\n", 281 | " a.init_actions(env.action_count)\n", 282 | "\n", 283 | " for i in range(n_steps):\n", 284 | " optimal_reward = env.optimal_reward()\n", 285 | " \n", 286 | " for agent in agents:\n", 287 | " action = agent.get_action()\n", 288 | " reward = env.pull(action)\n", 289 | " agent.update(action, reward)\n", 290 | " scores[agent.name][i] += optimal_reward - reward\n", 291 | " \n", 292 | " env.step() # change bandit's state if it is unstationary\n", 293 | "\n", 294 | " for agent in agents:\n", 295 | " scores[agent.name] = np.cumsum(scores[agent.name]) / n_trials\n", 296 | " \n", 297 | " return scores\n", 298 | "\n", 299 | "def plot_regret(scores):\n", 300 | " for agent in scores:\n", 301 | " plt.plot(scores[agent])\n", 302 | "\n", 303 | " plt.legend([agent for agent in scores])\n", 304 | " \n", 305 | " plt.ylabel(\"regret\")\n", 306 | " plt.xlabel(\"steps\")\n", 307 | " \n", 308 | " plt.show() " 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 10, 314 | "metadata": {}, 315 | "outputs": [ 316 | { 317 | "name": "stderr", 318 | "output_type": "stream", 319 | "text": [ 320 | "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:9: RuntimeWarning: invalid value encountered in true_divide\n", 321 | " if __name__ == '__main__':\n", 322 | "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in true_divide\n", 323 | " after removing the cwd from sys.path.\n", 324 | "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: RuntimeWarning: divide by zero encountered in log\n", 325 | " \"\"\"\n", 326 | "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: RuntimeWarning: invalid value encountered in sqrt\n", 327 | " \"\"\"\n", 328 | "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: RuntimeWarning: invalid value encountered in true_divide\n", 329 | " \"\"\"\n", 330 | "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: RuntimeWarning: divide by zero encountered in true_divide\n", 331 | " \"\"\"\n" 332 | ] 333 | }, 334 | { 335 | "data": { 336 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzs3Xd8TtcfwPHPSYSEEGKGIETsERI7\nYqRib1WKUq1VSqdVNav0R7e2Ss1WQ2uV2tSmYtaIESMIIQSRiMzn/P64yZM8suV5ngzn/XrllXvP\nPffc75PxfJ9777nnCCkliqIoivI8i+wOQFEURcmZVIJQFEVRUqQShKIoipIilSAURVGUFKkEoSiK\noqRIJQhFURQlRSpBKIqiKClSCUJRFEVJkUoQiqIoSoryZXcAWVGiRAnp5OSU3WEoiqLkKidOnHgg\npSyZXr1cnSCcnJw4fvx4doehKIqSqwghbmSknrrEpCiKoqRIJQhFURQlRSpBKIqiKCnK1fcgUhIT\nE0NgYCCRkZHZHYqi5GrW1tY4OjpiZWWV3aEo2STPJYjAwEAKFy6Mk5MTQojsDkdRciUpJSEhIQQG\nBlKpUqXsDkfJJnnuElNkZCTFixdXyUFRskAIQfHixdWZ+EsuzyUIQCUHRTEC9X+k5MkEoSiKkldt\nOxfEyN9OYI7polWCMAFLS0tcXV31X3PmzMl0G8ePH2fMmDEALFu2jNGjR79wPP7+/nTu3BlnZ2fc\n3Nxo3bo1+/fvf+H20jJt2jTmzZuXbr379+9jZWXFzz//bJI4Hj9+zI8//mhQFhQUROfOnY16nClT\nprBr1y4AWrVqZdIHN5cvX46LiwsuLi4sX748xToPHz6kbdu2uLi40LZtWx49egTAxYsXadq0KQUK\nFDD4/URHR+Pp6UlsbKzJ4lay5nFENG8vP4bThM04TdjMiN9OsvXcXfb7PzD5sVWCMAEbGxtOnz6t\n/5owYUKm23B3d+e7777LciyRkZF06tSJYcOGcfXqVU6cOMH333/PtWvXktU155vEn3/+SZMmTfDx\n8TFJ+ykliK+++oqhQ4ca9TgzZszglVdeMWqbKXn48CHTp0/n6NGj+Pr6Mn36dP2bf1Jz5szBy8sL\nf39/vLy89B9O7O3t+e677/joo48M6ufPnx8vLy9Wr15t8tegZE7osxgazdqF64yd7LoQnGy77/UQ\nk8egEoQZOTk5MX78eBo1akSjRo24cuUKoL1Z1q5dm3r16uHp6QnA3r17U/y0e+PGDby8vKhbty5e\nXl7cvHkTgMGDBzNmzBiaNWtG5cqVWbNmDQArV66kadOmdO3aVd9G7dq1GTx4MKB94h82bBje3t68\n8cYbxMXF8fHHH9OwYUPq1q1r8Al/7ty5+vKpU6fqy2fNmkW1atV45ZVXuHTpEgBXr16lQYMG+jr+\n/v64ubnp1318fPjyyy8JDAzk9u3b+vLFixdTtWpVWrVqxdChQ/VnTvfv36dXr140bNiQhg0bcujQ\nIX38Q4YMoVWrVlSuXFmfVCdMmMDVq1dxdXXl448/BmDt2rW0b98eINXXuXfvXjw9PenRowc1a9Zk\nxIgR6HQ64uLiGDx4MLVr16ZOnTp8/fXX+p97ws86KR8fH+rUqUPt2rUZP368vtzW1pZPPvmEevXq\n0aRJE+7du5ds35Rs376dtm3bYm9vT7FixWjbti3btm1LVu+vv/5i0KBBAAwaNIgNGzYAUKpUKRo2\nbJhil9Xu3buzcuXKDMWhmF5YZAxOEzZTb/oOgsOi9OWvN67AqmFNCJjTif+mePNxu+omjyXPdXNN\navqm8/jdeWLUNmuWLcLULrXSrPPs2TNcXV316xMnTuS1114DoEiRIvj6+rJixQree+89/v77b2bM\nmMH27dspV64cjx8/TrPt0aNH88YbbzBo0CCWLFnCmDFj9G8CQUFBHDx4kIsXL9K1a1d69+7N+fPn\nDd6oU3LixAkOHjyIjY0NCxcuxM7OjmPHjhEVFUXz5s3x9vbG398ff39/fH19kVLStWtX9u/fT6FC\nhVi1ahWnTp0iNjaWBg0a4ObmhrOzM3Z2dpw+fRpXV1eWLl2qT0q3bt3i7t27NGrUiD59+rB69Wo+\n+OAD7ty5w8yZMzl58iSFCxemTZs21KtXD4CxY8fy/vvv4+Hhwc2bN2nXrh0XLlwAtMsne/bsISws\njGrVqjFy5EjmzJnDuXPnOH36NADXr1+nWLFiFChQANASUUqvE8DX1xc/Pz8qVqxI+/btWbduHZUq\nVeL27ducO3cOIM3f0507dxg/fjwnTpygWLFieHt7s2HDBrp3787Tp09p0qQJs2bNYty4cSxatIjJ\nkyezcuVK5s6dm6ytKlWqsGbNGm7fvk358uX15Y6OjgaJNcG9e/dwcHAAwMHBgeDg5J88n1e7dm2O\nHTuWbj3FPJrP+cdgfef7nriULmxQZlfQPM+m5OkEkV0SLjGlpF+/fvrv77//PgDNmzdn8ODB9OnT\nh549e6bZ9pEjR1i3bh0AAwcOZNy4cfpt3bt3x8LCgpo1a6b6ybRHjx74+/tTtWpVfTtdu3bFxsYG\ngB07dnDmzBn9p+LQ0FD8/f3ZsWMHO3bsoH79+gCEh4fj7+9PWFgYPXr0oGDBgvq2Erz99tssXbqU\nr776itWrV+Pr6wvAqlWr6NOnDwB9+/blrbfe4oMPPsDX15eWLVtib28PwKuvvsrly5cB2LVrF35+\nfvq2nzx5QlhYGACdOnWiQIECFChQgFKlSqX42oOCgihZMnHwytReZ/78+WnUqBGVK1cGtN/TwYMH\n8fLy4tq1a7z77rt06tRJn0xScuzYMVq1aqU/Xv/+/dm/fz/du3cnf/78+jNDNzc3du7cqa/Tv3//\nVNtM6YaksXoZWVpakj9/fsLCwihcuHD6OyhG9zgimoL587H3UjBPImPJb2nB3o9bUbaoTbbGlacT\nRHqf9LND0n/qhOUFCxZw9OhRNm/ejKura6rJJb32Ej4dQ+IbSq1atQxuSK9fv57jx48bXIsuVKiQ\nwX7ff/897dq1MzjO9u3bmThxIsOHDzco/+abb1J9o+rVqxfTp0+nTZs2uLm5Ubx4cUC7/HLv3j39\nZY07d+7g7++fZq8MnU7HkSNH9IksqaSv29LSMsV7KTY2NgZ9+lN7nXv37k32eoQQFCtWjP/++4/t\n27fzww8/8Mcff7BkyZIUY03rdVhZWenbTxpremcQjo6O7N27V18eGBhIq1atktUvXbo0QUFBODg4\nEBQURKlSpVKNJamoqCisra0zVFcxrtpTtxMeZfg3e2RiG4rbFkhlD/NR9yDMLOFm4OrVq2natCmg\nXa9v3LgxM2bMoESJEty6dSvV/Zs1a8aqVasA7U3Fw8MjzeO9/vrrHDp0iI0bN+rLIiIiUq3frl07\nfvrpJ2JiYgC4fPkyT58+pV27dixZsoTw8HAAbt++TXBwMJ6enqxfv55nz54RFhbGpk2b9G1ZW1vT\nrl07Ro4cyZtvvgnApUuXePr0Kbdv3yYgIICAgAAmTpzIqlWraNSoEfv27ePRo0fExsaydu1afVve\n3t7Mnz9fv55eEi1cuLD+DAOgatWqBAQEpPs6QbvEdP36dXQ6HatXr8bDw4MHDx6g0+no1auX/jJY\naho3bsy+fft48OABcXFx+Pj40LJlyzTj7d+/v0HHhoSvhDOcdu3asWPHDh49esSjR4/YsWNHsuQG\n2hlcQg+n5cuX061btzSPCxASEkLJkiXVkBrZ4IttF5Mlh+GelXNEcoA8fgaRXZ6/B9G+fXt9b5Ko\nqCgaN26MTqfT9+D5+OOP9Z+gvby8qFevHvv27Uux7e+++44hQ4Ywd+5cSpYsydKlS9OMxcbGhr//\n/psPPviA9957j9KlS1O4cGEmT56cYv23336bgIAAGjRogJSSkiVLsmHDBry9vblw4YI+qdna2vLb\nb7/RoEEDXnvtNVxdXalYsSItWrQwaK9///6sW7dOf0nGx8eHHj16GNTp1asXffv25dNPP2XSpEk0\nbtyYsmXLUrNmTezs7PSve9SoUdStW5fY2Fg8PT1ZsGBBqq+7ePHiNG/enNq1a9OhQwfmzp2Ls7Mz\nV65coUqVKqm+ToCmTZsyYcIEzp49q79hffbsWd588010Oh0As2fPTvXYDg4OzJ49m9atWyOlpGPH\njhl6o06Lvb09n376KQ0bNgS07rUJl+LefvttRowYgbu7OxMmTKBPnz4sXryYChUq8OeffwJw9+5d\n3N3defLkCRYWFnzzzTf4+flRpEgR9uzZQ8eOHbMUn5JxsXE6msz+hwfhiTegd7zvSdXSOe/ynjDH\nwxam4u7uLp/vd37hwgVq1KiRTRGlLWGCoxIlSmR3KGYzb948QkNDmTlzZobqh4eHY2trS2xsLD16\n9GDIkCHJEsqLWr9+PSdOnOCzzz5Ltc7evXuZN28ef//9t1GOmRv07NmT2bNnU61atWTbcvL/U271\nwR+nWXfSsINBwJxOZo1BCHFCSumeXj11BqGYTI8ePbh69Sr//PNP+pXjTZs2jV27dhEZGYm3tzfd\nu3c3ajwhIabvO56bREdH07179xSTg2JcOp3kYUR0suTw70SvbIoofeoMQlGUVKn/pxd3POAhLqUL\nY2djxeGrD/hhzxUOXdE+oIzxcuGDtlWzLTZ1BqEoimJmMXE6XD7Zmmad4oXy8/4rLmaKKGtUglAU\nRcmCmDgd7b7ez7UHT9OtO6dnHfo2qmCGqIxDJQhFUZQXFBYZQ7f5h5Ilh3a1SjPM0xnX8kVxnrQF\ngMufdSB/vtz1ZIFKEIqiKC9gle9NJqw7q18vWtCK395qTO1ydgb1zN1DyZhyVzrLJQICAqhdu7ZB\nWdJhsOfNm0f16tX1A/StWLEC0IaLrlatGq6urtSoUYOFCxcatHHq1CmEEGzfvt1kcf/+++8maVtR\n8pJF+68ZJIf17zTj9BTvZMkht1MJwswWLFjAzp078fX15dy5c+zfv99gaIaVK1dy+vRpDh06xPjx\n44mOjtZv8/HxwcPDw2RDZKsEoSgZM2uLNlBkM+fiBMzpRP0KxbI5ItNQCcLMPv/8c3788UeKFCkC\ngJ2dnX545qTCw8MpVKgQlpaWgDa+z5o1a1i2bBk7duwwGFdo5syZVK9enbZt29KvXz/9mcrVq1dp\n3749bm5utGjRgosXLwKpDw0+YcIEDhw4gKurq344a0VREkXH6nCasBkAx2I2/D60STZHZFp5+x7E\n1glw92z69TKjTB3okPkZ4gD9eEXOzs6p1unfvz8FChTA39+fb775Rp8gDh06RKVKlXB2dqZVq1Zs\n2bKFnj17cvz4cdauXZtsuG2AYcOGsWDBAlxcXDh69CjvvPOO/qG1lIYGnzNnzkv3FLGiZNQ/F+8x\nZFnic1ff9nVNo3beYLIzCCFEeSHEHiHEBSHEeSHE2PjyaUKI20KI0/FfHZPsM1EIcUUIcUkIkXwk\nslwitdFNdTpdukM0r1y5kjNnznDz5k3mzZvHjRs3AO3yUt++fQFtiOyEy0wHDx6kW7du2NjYULhw\nYbp06QJoZyCHDx/m1VdfxdXVleHDhxMUFKQ/TkaGBlcURbPtXJBBcvjtrca4VbTPxojMw5RnELHA\nh1LKk0KIwsAJIcTO+G1fSykNJi4WQtQE+gK1gLLALiFEVSll3AtH8IKf9LOqePHiyaaDfPjwIW5u\nbhQqVIhr167p5xtITcmSJWnQoAFHjx7F0dGRtWvXsnHjRmbNmoWUkpCQEMLCwlIdWlqn01G0aNFU\nRz1NaWhwRVEgJDyKc3ee0LKqNp/H/bAoRvymjd7buJI9q4c3zc7wzMpkZxBSyiAp5cn45TDgAlAu\njV26AauklFFSyuvAFaCRqeIzJVtbWxwcHNi9ezegJYdt27bh4eHBxIkTGTVqFE+eaDPdPXnyJFlv\nJdCG5D516hTOzs7s2rWLevXqcevWLQICArhx4wa9evViw4YNeHh4sGnTJiIjIwkPD2fzZu36aJEi\nRahUqZJ+NE8pJf/991+acT8/RLai5GWBjyJwmrAZpwmbCY2IYdrG8zhN2IzbZ7sYtMRXv63hrF0A\n1HQo8lIlBzDTPQghhBNQHzgKNAdGCyHeAI6jnWU8Qkse/ybZLZC0E0qOtmLFCkaNGsWHH34IwNSp\nU3F2dmbkyJGEh4fr5we2srLS1wHtHoSNjQ1RUVEMHjwYNzc3Bg8enOIQ2T/99BNbt26la9eu1KtX\nj4oVK+Lu7q4fInvlypWMHDmSzz77jJiYGPr27aufwjMldevWJV++fNSrV4/BgwfrZ7xTlLzk531X\nmb31okFZvRk70t3vpwFpT92bF5l8sD4hhC2wD5glpVwnhCgNPAAkMBNwkFIOEUL8AByRUv4Wv99i\nYIuUcu1z7Q0DhgFUqFDBLeEafYKXcXCxhCGyIyIi8PT0ZOHChenOQ60oGZHX/p/+t+0iP+69mmad\n19zLM71bLaJidNwPj8K5ZCHCo2IpbJ13JlTKEYP1CSGsgLXASinlOgAp5b0k2xcBCV1mAoHySXZ3\nBO4836aUciGwELTRXE0Tee4ybNgw/Pz8iIyMZNCgQSo5KMpzErqmJvCoUoJqZQrzQduqFMxvyZNn\nsdgVNEwA1laW+rK8lBwyw2QJQmjddRYDF6SUXyUpd5BSJnSn6QGci1/eCPwuhPgK7Sa1C+Brqvjy\nEvVwm6Ik9/BpNMFhkXT+7qBB+ZYxLahZtohB2fPJQdGY8gyiOTAQOCuESOhKMwnoJ4RwRbvEFAAM\nB5BSnhdC/AH4ofWAGpWlHkyKoryUomN1VJ2cfMjtFUMaUb1MYUoVsc6GqHInkyUIKeVBIKVO/1vS\n2GcWMMtUMSmKkrc9jYql6ezdBmWv1CjNL4PSvdyupCBvP0mtKEqeFhoRw8OIaBzsrOn03QGu3teG\n3W5XqzTf9auPlYUFFhZpP5yqpE4lCEVRcqUlB68z42+/FLf9PFCdMRiDGqzPyEJCQnB1dcXV1ZUy\nZcpQrlw5XF1dKVq0KDVr1szu8DJNp9MxZswYateuTZ06dWjYsCHXr1836TGdnJx48OABAM2aNcty\ne926daNpU9M94LRs2TLu3EnW4U4xkivBYSw/HGBQtvG/O8mSQ/UyhZn/en2uz+6IYhzqDMLIihcv\nrh/eYtq0adja2vLRRx8REBBA586dszm6zFu9ejV37tzhzJkzWFhYEBgYSKFChcx2/MOHD2dp/8eP\nH3Py5ElsbW25fv06lSpVMlJkiZYtW0bt2rUpW7as0dt+WfX88RAnbz42KJu68TzWVhZExuj0ZRM6\nVGdEy9QHv1SyRp1BmFFcXBxDhw6lVq1aeHt78+zZMwBOnz5NkyZNqFu3Lj169NCP49SqVSvef/99\nPD09qVGjBseOHaNnz564uLgwefJkQJvDoXr16gwaNIi6devSu3dvIiIiAG347po1a1K3bl0++ugj\nAG7cuIGXlxd169bFy8uLmzdvAqkPAR4UFISDgwMWFtqfiqOjI8WKaWPfjxw5End3d2rVqsXUqVP1\nr9PJyYlJkybRtGlT3N3dOXnyJO3atcPZ2ZkFCxYAsHfvXjw9PenRowc1a9ZkxIgR6HSJ//gJbG1t\n9fVbtWpF7969qV69Ov3799ePIbVlyxaqV6+Oh4cHY8aMMUjEa9eupUuXLvTt25dVq1bpy69evUqT\nJk1o2LAhU6ZM0R8HYO7cuTRs2JC6devqX1dAQAA1atRI9vtbs2YNx48fp3///ri6uup/p8qL+fda\nCE4TNidLDgmSJoe3PSqp5GBiefoM4gvfL7j48GL6FTOhun11xjca/0L7+vv74+Pjw6JFi+jTpw9r\n165lwIABvPHGG3z//fe0bNmSKVOmMH36dL755hsA8ufPz/79+/n222/p1q0bJ06cwN7eHmdnZ/1Q\nGJcuXWLx4sU0b96cIUOG8OOPPzJkyBDWr1/PxYsXEULw+LH2Dzd69GjeeOMNBg0axJIlSxgzZgwb\nNmwAUh4CvE+fPnh4eHDgwAG8vLwYMGAA9evXB2DWrFnY29sTFxeHl5cXZ86coW7dugCUL1+eI0eO\n8P777zN48GAOHTpEZGQktWrVYsSIEQD4+vri5+dHxYoVad++PevWraN3796p/vxOnTrF+fPnKVu2\nLM2bN+fQoUO4u7szfPhw9u/fT6VKlejXr5/BPj4+PkydOpXSpUvTu3dvJk6cCMDYsWMZO3Ys/fr1\n0yctgB07duDv74+vry9SSrp27cr+/fupUKFCqr+/+fPnM2/ePNzd1XXvFxUVG8egJb78e+2hvqx+\nhaKcuvmYrWNbcCU4nHd9TtGprgMV7QvybhsXbPJbZmPELwd1BmFGlSpVwtVVG0Pezc2NgIAAQkND\nefz4MS1btgRg0KBB7N+/X79P165dAahTpw61atXCwcGBAgUKULlyZW7dugVob8bNmzcHYMCAARw8\neJAiRYpgbW3N22+/zbp16yhYsCAAR44c4fXXXwdg4MCBHDyY+BBRSkOAOzo6cunSJWbPno2FhQVe\nXl76QQj/+OMPGjRoQP369Tl//jx+fonXhJPG3bhxYwoXLkzJkiWxtrbWJ6tGjRpRuXJlLC0t6dev\nn0EsKWnUqBGOjo5YWFjg6upKQEAAFy9epHLlyvpLR0kTxL1797hy5QoeHh5UrVqVfPnyce7cOf3P\n4dVXXwXQ/zxASxA7duygfv36NGjQgIsXL+Lv75/q70/JuuAnkVSbvE2fHGqXK8LVzzuy/p3mBMzp\nRA2HInSpV5aAOZ344fUGjGtfXSUHM8nTZxAv+knfVJIOsW1paZmhyxEJ+1hYWBjsb2FhQWxsLJB8\n/gkhBPny5cPX15fdu3ezatUq5s+fr58s6Pm6KcWXdIyuAgUK0KFDBzp06EDp0qXZsGEDlStXZt68\neRw7doxixYoxePBgg1nuXjTujPwsQPv5xcbGpjlU+erVq3n06JE+eTx58oRVq1bx2WefpbqPlJKJ\nEycyfPhwg/KAgIAX+v0pqdvld4+3Vxw3KPvIuyqj27hkU0TK89QZRDazs7OjWLFiHDhwAIBff/1V\nfzaRUTdv3uTIkSNA4rzV4eHhhIaG0rFjR7755hv9jfNmzZrpr8WvXLkSDw+PNNs+efKkvoeOTqfj\nzJkzVKxYkSdPnlCoUCHs7Oy4d+8eW7cmf3I1Pb6+vly/fh2dTsfq1avTjSUl1atX59q1a/pP86tX\nr9Zv8/HxYdu2bQQEBBAQEMCJEyf0r71JkyasXauNA5n03kS7du1YsmQJ4eHhANy+fZvg4OA0Y1DD\npGfeLweuGSSHhk7FCJjTSSWHHCZPn0HkFsuXL2fEiBFERERQuXJlli5dmqn9a9SowfLlyxk+fDgu\nLi6MHDmS0NBQunXrRmRkJFJK/RzT3333HUOGDGHu3LmULFky3WMFBwczdOhQoqKiAO0yz+jRo7G2\ntqZ+/frUqlWLypUr6y9xZUbTpk2ZMGECZ8+e1d+wziwbGxt+/PFH2rdvT4kSJWjUSJtCJCAggJs3\nb9KkSeKcwZUqVaJIkSIcPXqUb775hgEDBvDll1/SqVMn/RDp3t7eXLhwQd8t1tbWlt9++00/9WtK\nBg8ezIgRI7CxseHIkSPY2Nhk+nW8TOJ0ks82XwBgQJMKTO9aG0v1MFvOJKXMtV9ubm7yeX5+fsnK\n8rLr16/LWrVqZXcYmbZnzx7ZqVMno7QVFhYmpZRSp9PJkSNHyq+++irdfZ4+fSp1Op2UUkofHx/Z\ntWtXo8SS15ji/+m1nw/LiuP/lnO3XTR620rGAMdlBt5j1RmEkustWrSI5cuXEx0dTf369ZPdP0jJ\niRMnGD16NFJKihYtypIlS8wQ6cvnaVQstaZuT3Hbh95VzRyNklkmnzDIlNzd3eXx44Y3ufLaBCeK\nkp2y8v90+/Ezms9J3jECYMOo5riWL5qV0JQsyBETBmUXKWW6PWIURUnbi3541OkklScZDtp87fOO\nLD8SwG//3uDbvvWpXc7OCBEqppbnEoS1tTUhISEUL15cJQlFeUFSSkJCQrC2zvjcCW8vP86uC/eS\nlV/9vCMWFoI3m1fizebGH+pEMZ08lyAcHR0JDAzk/v372R2KouRq1tbWODo6ZqjuuduhyZLDgXGt\nKW9f0BShKWaS5xKElZWVSQZkUxQlZbceRtD5e+0p+J4NyrHT7x4nP22LlaV6zCq3y3MJQlEU07vz\n+BnNnrsB3aSyPV/1cc2miBRTUAlCUZQUxcTpaPf1fq49eJpu3ZKFC7BqmOnm3FCyh0oQiqIYCIuM\n4a3lx/G9/jDNenXK2eFVoxS9Gjiqew15lEoQiqLo+V5/SJ+fjxiUdalXlgkdqvPPxWDqly9K5+8P\n8ueIpjR0ss+mKBVzUQlCUV5y0bE6qk7eyvj21fliW+L8KYObOTGtay39+sAmFQEImNPJ7DEq2UMl\nCEV5iXl9uZer97V7DEmTw/XZHdVzRIpKEIrysgkOi2TYihOcvpXytJ5XP1fJQdGoBKEoL5F52y8x\nf88Vg7K/3/Wgdjk7wqNisS2g3hKUROqvQVHyMCkllSZuSXHbx+2qMaBJRexsrABUcshp7pyCha1g\n9HEoXgXOr4c1byZuH/oPlHMzaQgm+4sQQpQHVgBlAB2wUEr5rRDCHlgNOAEBQB8p5SOhndN+C3QE\nIoDBUsqTpopPUV4GVScnn+lv/uv16Vy3bDZE85J6cAV+7QGhN6F2L+ixEGIiwLpI8rpSghCwdTwc\nXaCVzU9l0NVTv+XeBAHEAh9KKU8KIQoDJ4QQO4HBwG4p5RwhxARgAjAe6AC4xH81Bn6K/64oSiZF\nxcZRbfI2/frxya9gWyAflhZCDYFhDlLCzy3g7lnD8nNrta+sGnkYipt+elaTJQgpZRAQFL8cJoS4\nAJQDugGt4qstB/aiJYhuwIr42Y7+FUIUFUI4xLejKEoG3Qh5Ssu5e/XrG0Y1p4RtgewLKK97fAu+\nqa0tNxgE9pVh19Tk9Tzeh4NfZ7zdtjOhyUj4rBRUagn9fMDKvNPZmuWioxDCCagPHAVKJ7zpSymD\nhBCl4quVA24l2S0wvkwlCEXJoIjoWIPksHpYEzUxjymF3U1MDgAnlxtuf3U51OgKFvFnba0mwcNr\nUMAWHlzWLj29thJW99e2T3nlbZ2xAAAgAElEQVQIjwKguHNiG1MfmfQlpMXkCUIIYQusBd6TUj5J\no/tcShuSzVgihBgGDAOoUKGCscJUlFwtNk7HR3/+x4bTdwCoXqYw297zzOao8hgp4cpuCAuCjaOT\nb283G7ZP1Ja7fKudTTz/fpcvP5Sqri3bOcK0UG054TsYJodsZtIEIYSwQksOK6WU6+KL7yVcOhJC\nOADB8eWBQPkkuzsCd55vU0q5EFgI2pSjJgteUXKBz/7245eD15OVbx7TIhuiyeXiYiA2EgoUTr7t\n9O+wYWTq+44PAJti0PQd0MWBhaXJwjQnU/ZiEsBi4IKU8qskmzYCg4A58d//SlI+WgixCu3mdKi6\n/6AoqXv4NDrF5LDzfU8sLdSDbhny7DFIndar6OvEYUWY+lj79P/gCgQeSzk5lKkLJVyg9xLD8jyS\nHMC0ZxDNgYHAWSHE6fiySWiJ4Q8hxFvATeDV+G1b0Lq4XkHr5vomivKSS+iN1LN+Ob56zZW52y/y\nw56rKdZVw2O8gB8aQ/jd5OXTU7hvY1saPrps+phyEPGiE5PnBO7u7vL48ePZHYaiGFVMnA6XT5I/\nv5CSErb5OfbJKyoxvIiZpSAuyrBsUhB87pC8ruc4aPOJeeIyAyHECSllKg9YJFKPTipKDnL5Xhje\nX+9Ps07XemV5rWF5mlQuri4lZcSjG1r30EIlYe9s2PeFtpyQHN49CfltoXBpbf3TB1oPo90zoOdC\nsLbLvtizmUoQipIDRMbEUf3TbQZlPRuU435YFJM71eT8nVAOXQmhmXNxerk5ZlOUOdyJ5XB5G1za\nApU8YcB6iHoC39ZNXvfpfe17r8XJew1ZWkHpWvD6atPHnMOpBKEo2WjYiuPs8LtnUPaRd1VGtzF8\nSrZamcL0bKASQ4runIaFLQ3Lru+HmcXT3s/zY6jT23Rx5QEqQShKNjkW8DBZcrgyqwP51FAYGRfx\nMHlysC4Kkc8NZT7yCNy/CEUrgqNpxy/KS1SCUJRs8K7PKTb9pz3ms+gNd4auOM7B8a1VcsiokKva\nOEd/DkosS+iaCtozDTNLQPP3oO10rax0TfPHmcupBKEoZvT88NuVSxSibc3SahpPKbUHzCzTeEu6\nexYubIKLm+HeOcNtSZ9EBu0+wvNlSqapBKEoZjDzbz8WP/dQ22fdazMgfp7nPOPWMVj1urY85pTW\neyi9B8f+WwXrh7/Y8UpWh3f+fbF9lXSpBKEoJpbSjejDE9pQtqh5R+Y0qcs74PdXDctml0tc7r8W\nnDwgXwHtDKBkNe0p5I1jkg9wlxG9l0LN7omD4CkmoRKEopjI0kPXmb7JT7++dWwLHoRH0cKlZDZG\nZQIZeZNf2Sv9dkYcgtUDoPPXULSCNmz2w2tw5g/4zwc6zoOq3saJWckQ9SS1ohhZbJwO72/2c+3+\nU33Z2pFNcaton41RGZH/rtTf8Kc8SvxUHxkKa4eC//a02xt+ABxSeFZBMRn1JLWiZAMpJfWm7+Bp\ndBwA5YracGhCm2yOKhU6HWwaA6d+hcYjtWGoN41N3F69M/RdqS0/vAbf1U+9rfoDodt8wzJrO+j/\nR+L6Ii+4fVxLCCVctGGz7Ssb7/UoRqfOIBTFSA7432fgYl/9unPJQvz6VuOcca9BSvhrFNhXgmv7\ntCeN98x68fa6fg+Xt4NjQ20kVI8PwMraePEqJqXOIBTFjP65eI8hyxI/rCwd3JDW1UulsYeZXN4O\nv/dJXh5wIHHZsaE2pDVAzW7a9JaRobB7uuE+lVpq9wGKltd6JzV4w3RxKzmCShCKkkVSSn1yKFrQ\nCt9Jr5A/Xzb3romLhbv/pZwcEhQqBR/7p7694VuwrDPcPQMjD2vjEykvFZUgFCWLpvx1HoB+jcoz\nu2c23WyNidQ+8Z9bC+GGXWpp/h4Uq6iNWFqzuzbtZUZY28GIA+nXU/IslSAUJQueRMbw6783AJjR\nrXY6tU1gYWu4czL17TW6JA41oSiZpBKEorwgKSV1p+0A4ON21bAy5zhKsdHwWQrPU7SdAcWcoFon\niIuG/AXNF5OS56gEoSgv6K3l2n2HErb5GdW6inkPnjQ51OwOfhtg0h3IXyixPK1xjRQlA9RfkKK8\ngBVHAvjnYjAAB8aZ8TmHpyEwN8mzAyMOQpk65ju+8lJRCUJRMuH50Vh3vO+JTf50BqMzzoG1Hkn+\n2iUtLAvAuGtQwNb0x1ZeWipBKEoG7b0UzOClx/Trr9QoTdXShbPWaGggFCkH0U/h/DrY+C6UrAH3\nLyTWsS0DFvngSWBi2afBWTuuomSAShCKkgGxcTqD5PDniKY0dMrE2EpPH2hzFKwfCTW7gou39nzB\nim7J6yZNDgDhdw3XP7ycicgV5cWpBKEoGVDlk6365Wufd8TCQmR858AT8EuS+xSXNqddX1hqCaR0\nLXh8A87+qZX3/R2qv+QTCylmlaEEIYQYK6X8Nr0yRcmLWs/bq1++PrsjQmQiOeh0hsnheR3mQsVm\nsHU8DFynzZfwvF6/ZPx4imJEGT2DGAQ8nwwGp1CmKHnK3dBIrj/Qhu32/cQr48nh2WP4IslscaXr\nQL/fwba0lgR2fArBF6DxMG37m+mcVShKNkgzQQgh+gGvA5WEEBuTbCoMhJgyMEXJLo+eRjNzsx/r\nTt7Wl60Z0ZRShdMZrVSng60fw7EUPvEP2ggFk9yz8J5ppGgVxXTSO4M4DAQBJYAvk5SHAWfS2lEI\nsQToDARLKWvHl00DhgL346tNklJuid82EXgLiAPGSCnTmWVEUYwvPCqW+jN3GpTVcCiCe3o3pHVx\nMCOVOv3XGiYHRckl0kwQUsobwA2gqRCiIuAipdwlhLABbNASRWqWAfOBFc+Vfy2lnJe0QAhRE+gL\n1ALKAruEEFWllHGZeTGK8iKWHbrOtE1+FMxvSUR04p/ch22rcif0GR+3q57yjlLCkfhJcnZMNtxm\nbaf1NlJzJCi5WEZvUg8FhgH2gDPgCCwAvFLbR0q5XwjhlME4ugGrpJRRwHUhxBWgEXAkg/srSnIx\nkSAs0hy99OLdJ+zavJoA69m8GjWFY2jJIN2b0U/uwFc1kpcnnXJTUXK5jN6kHoX2hn0UQErpL4R4\n0dlQRgsh3gCOAx9KKR8B5YB/k9QJjC9TlPTdv6x9gq/VA1z7JZbPKp1i9dhJwSw6HMj283cpfns3\nv+XXrp7+WWCGVqFGF7gE2DmCQz3DneNitecUFngYlrf5FJq9q5KDkqdkNEFESSmjEz5RCSHyAS8y\nV+lPwMz4fWei3dcYAqT0US3F9oUQw9DOZqhQocILhKDkGbd8YXHbxHX/7RD5WHt+4K/Rqe6W7/NS\n/Bf9Hpd1dfGzjr+1ZpEPdLHa8oVN2leCycFaz6O4GPi8rDZKKkBFD9X7SMnTMpog9gkhJgE2Qoi2\nwDvApnT2SUZKqZ/JRAixCPg7fjUQKJ+kqiNwJ5U2FgILQZuTOrMxKLlUXCwIAQhY4p04Rebztk0w\nWI0auIUCv3bkhM6FN6PHccZ6KAAL8n9juN+nD7T2j/wA2ycZbvsslZNllRyUPC6jCWICWg+js8Bw\nYAuQ6ad3hBAOUsqg+NUewLn45Y3A70KIr9BuUrsAvik0oeRlf76pjUdUpg7cPauVVWgKN9O5FWVV\nEMacgn8+g1O/6otnxAxkyaLHwO8AnPq0LeceeVNmiRsldA8S958cHJ98gKajICoMIh5Cy/GGI6cm\nGPoPlHPLwgtVlNxBSJn2h3AhhCWwXEo5IFMNC+EDtELrInsPmBq/7op2+SgAGJ6QMIQQn6BdbooF\n3pNSbk3W6HPc3d3l8ePH06um5AYHvoTdMzK3T50+0GuRQdGTG6dp/pM/UVgRjZW+fMuYFtQsWySx\nok6nDX5XoAjYFE39GPf84Kem2nL3BVDnVTXPgpLrCSFOSCnd062XXoKIb2w70EVKGW2M4IxFJYg8\nIDYKfvFKPGPo8i1sGqstNxgEJ5dry5OCYPtEOLEMPrwEhcska+ppVCy1piY+PrPjfU9K2hagWKEM\nzsGsKC8JYyeIn4EGaJeCniaUSym/ykqQWaUSRC4TdAZ+bgFVO8BrvwICvq4J4fG3prp+Dw3egPBg\nrSyDE+FExsTx4Z//sflMkL7s/PR2FCqgPukrSkoymiAy+h90J/7LAm2YDUXJuKM/w9ZxieuXt8LM\nEoZ1XpmuJQcA21LaVwZ1/v4gV4LD9euHJ7RRyUFRjCBD/0VSyummDkTJg2KjUu8BlNQ7R6FUKk8r\npyPgwVN9ctj+nifVyqjPL4piLBl9knoTyZ9LCEV72O1nKWWksQNTcrmIh/C/SonrVgWheBV4a2fi\n8BO6OG24ikzc9L10N4x23+xPVr51bAuVHBTFyDL6n3kNKAn4xK+/htYzqSqwCBho/NCUXCk2CkKu\nwn+/J5aNPg4lXJLXtcjcXM4+vjeZuO5ssvKeDcpRw6FICnsoipIVGU0Q9aWUnknWNwkh9kspPYUQ\n500RmJIL/dYbrhiOhMq0UKM0XWXSFmJ1iSexfjPasftCMD/vv8r0rrWMcgxFUQxlNEGUFEJUkFLe\nBBBCVEB7vgEgR3V9VbJJwMHkyaHjvJTrZtKvRwL0yWFypxq83UJ7eK1LvbJ0qVfWKMdQFCW5jCaI\nD4GDQoiraOMmVQLeEUIUApabKjgll1jZRxsHCaDtDGg2JvHJZCOYtskPgPmv16dzXZUQFMVcMtqL\naYsQwgWojpYgLia5Mf1N6nsqed7ds4nJoeFQaD7WaE03nb2boFDtz6yCfUGVHBTFzDLai6kg8AFQ\nUUo5VAjhIoSoJqX8O719lTwsJjJx2Ov3zkHR8mnXz4CzgaFcD3nKGJ9TBuU/DWiQ5bYVRcmcjF5i\nWgqcAOIHpSEQ+JPE0ViVl1HCfAulaholOQSHRdJl/kGDsqVvNqRV1ZJpT96jKIpJZDRBOEspXxNC\n9AOQUj4T6j/25bLnc7Cy0S4jFbCFrUmG1R5+IEtNBz+JpNHnu5OV/zfVGzsbqxT2UBTFHDKaIKLj\n56GWAEIIZyDKZFEpOceV3fBbz8T1XdMMt797Mkujm0ZExxokh2IFrTj5aVt1xqAoOUC6/9nxZwoL\ngG1AeSHESqA5MNi0oSlmd+c0hFyBOr219X8+g/1zU6/fcR4Ud87SIT/fckG/vOejVlQqUShL7SmK\nYjzpJggppRRCjAW8gSZovZjGSikfpL2nkqs8ewwLW2rLD/zhzqnE3kkAHf4HjYdr8yNc3Q1l64OT\nR8ptZdCjp9H89u9NAK593hELC3XWoCg5SUavDfwLVJZSqjkW86ovKiYu75uTuJzfFibdTlwvXVP7\nekH3nkTS+Ln7DWO9XFRyUJQcyCKD9VoDR4QQV4UQZ4QQZ4UQZ0wZmGIizx7DNDvt62D8Iyz/rUrc\n3uzdxOVWk2BioNEOHRunS5YcAEa1rmK0YyiKYjwZnTCoYkrlUsobRo8oE9SEQZmk08GMYqlvH7YP\nyrpCzDOtx5KRvbXsGLsvBtPMuTh9G1WgRpnCVCllq25IK4qZGXXCoOxOBIqRrBmcuOzxPhz8OnG9\nUCktOYDRk0NsnI6P15xh98VgAFa+3VglBUXJBdS0Wy8DnQ4eXgO/v7T1ycGQrwBU6wiL22pl7/xr\nkkOfux1K5+8TH35bMMBNJQdFySVUgsjrnobA3MqJ6/X6ackBoHwjow3HnZINp27z3urTieujmuNa\nvqjJjqcoinGpBJGXHf4edkxOXK/eGXosMPlhg0Kf0XT2P/r1Fi4l+PWtxiY/rqIoxqUSRF51/YBh\ncki4rGRiUbFxBsmhXa3S/Dww3XthiqLkQCpB5EVSwvLO2nLryeD5kVHnZ0hLtcnb9MtrRjTF3cne\nLMdVFMX4VILIi06v1L4Xc4KWH5vlkFJK3k9yv+H67I7qZrSi5HIqQeQF59bB0Z+hnw9EP4W/Rmnl\nw/aaLYRKE7foly/ObK+Sg6LkASZLEEKIJUBnIFhKWTu+zB5YDTgBAUAfKeWj+AEBvwU6AhHAYCnl\nSVPFlqcc+Ap2T9eW/1cpsbz5e2CTxkNxRnQm8LF+ed07zbC2sjTLcRVFMa2MDrXxIpYB7Z8rmwDs\nllK6ALvj1wE6AC7xX8OAn0wYV94RcjUxOSRVvjG8Ms0sIZy7HUrX+YcAmNGtFg0qmCcpKYpieiY7\ng5BS7hdCOD1X3A1oFb+8HNgLjI8vXyG1cT/+FUIUFUI4SCmDTBVfnvB9/DSc3X8C19fNfvikD8EV\nym/JG02dzB6DoiimY+57EKUT3vSllEFCiFLx5eWAW0nqBcaXqQSRmq+SjKhq5uQgpTS459Chdhl+\nGuBm1hgURTG9nHKTOqU7mimOIiiEGIZ2GYoKFSqYMqacQ0qY765N5vO8N7eaNZQbIU/Zdu6ufr2e\no51KDoqSR5k7QdxLuHQkhHAAguPLA4Gks947AndSakBKuRBYCNporqYMNkd4cge+qpHytl6LoWIz\nk4cgpSQqVseolSf1A+4BnPq0LcUK5Tf58RVFyR7mThAbgUHAnPjvfyUpHy2EWAU0BkJf6vsPkU/A\npy/cOJR82ztH4cfGUKll4tSgJnT9wVNaz9ubrLxBhaIqOShKHmfKbq4+aDekSwghAoGpaInhDyHE\nW8BN4NX46lvQurheQevm+qap4srxAk/AL22SlycdVM+EA+wl+Ov0bcauOp2sfG7vurzqXj6FPRRF\nyWtM2YupXyqbvFKoK4FRpool14h+apgchu4BB1ezhxEZE5csOZyY/Aq21vkokE8946AoL4uccpNa\nAfBdmLg85RFYmPIxlZRNXHcGH1+tQ1ldRzuqli7M/3rVVXNGK8pLSCWInOLEctg1TVuedMfsyeFx\nRDSuM3YalK0b2Yx8luZPUoqi5Azqvz8n8F0Em8Zoy+2/gPyFzHr4v07fNkgOQ5pX4vrsjio5KMpL\nTp1BZCcp4bPSEBelrTu1gCYjzBrCuduhBvcb/Ga0o2B+9WehKIo6g8hefwxMTA41usDgv816+EX7\nr+mHyuhQuwwBczqp5KAoip56N8gOUeHaw29RT7R1r6nQ4gOzhrDnYjCztlzQr6unoRVFeZ5KEOam\n08HsconrHu+bPTncfvyMN5cdA2B8++qMbOVs1uMripI7qARhTqd+S5zMB+CtXVC+oVlDWHzwOjP/\n9gOgVbWSKjkoipIqlSDMJToiMTmUqgkjDpm9K+sPe64wd/slALyql2LxYPMmJ0VRcheVIMwhMhTm\nxI88W6wSvHPE7CHsvnBPnxwAFr7hbvYYFEXJXVSCMLXA4/BLwugiAsacMuvhN58JYtTvibO3jmtf\njXdaVTFrDIqi5E4qQZiSLi5JcgCmPARhniErpJTUmrqdiOg4fZl9ofwqOSiKkmEqQZjS7ROJy2YY\ngTUiOpYd5++xYN9VLt4NM9g2//X6dK5b1uQxKIqSd6gEYUpbx2nfP0phJjgjk1JSc8r2ZOUXZ7bH\n2kqNwKooSuapBGEKUsI8F3h6Hxwbgm1Jkx8y6RzRAF3qlWVOzzoqOSiK8sJUgjCFxW215ADQc5FJ\nDxUSHsX6U7f165c/60D+fGoEFUVRsk4lCGN7FACB2lPKvLER7CuZ7FC/HLjGZ5sTh8v4yLuqSg6K\nohiNShDGFBsN39bTlkcegdI1TXaoqNg4g+QwunUVRrdxMdnxFEV5+agEYUxzEuZqFiZLDgf9H/Dr\nvwFsP38PgKldavJmc9OdpSiK8vJSCcIYIh7C/5K8SU99ZPRD3HoYQYv/7UlW3se9fAq1FUVRsk4l\niKyKDDVMDh/5m+RhuJSSg5rcR1EUU1LvLln15+DE5cGbwbaU0Q/xrk/i8BwHxrWmvH1Box9DURTl\neSpBZEVcDFyN/2Q/9bHRzxxeXXCYYwGJl6t2feCpkoOiKGajEkRWzCyhfe+5yGjJQaeT9PjxEP8F\nGg7NMf/1+lQpVdgox1AURckIlSBe1Ll1ict1XjVKk6ERMbT5ci8hT6P1ZRM6VGe4Z2WEmQb5UxRF\nSaASxIta86b2fcRBo5w9HPR/wIDFR/Xr773iwrttXLC0UIlBUZTskS0JQggRAIQBcUCslNJdCGEP\nrAacgACgj5TS+P1FjWGaXeJymTpZamqX3z0c7W0MksORiW1wsLPJUruKoihZlZ1nEK2llA+SrE8A\ndksp5wghJsSvj8+e0NKwe0bicu8lWWqqzZd7uXb/qX69ZdWSLHuzobqcpChKjpCTLjF1A1rFLy8H\n9pLTEkTIVTjwpbY8cAM4t37hppwmbE5WtnxIoxduT1EUxdiyK0FIYIcQQgI/SykXAqWllEEAUsog\nIYTxHyjIqu8baN9dB2QpOXh88Y9++cw0b6QEOxurrEanKIpiVNmVIJpLKe/EJ4GdQoiLGd1RCDEM\nGAZQoUIFU8WX3KVticvd5r9QE+FRsdSemjipz76PW1HEWiUGRVFypmwZG1pKeSf+ezCwHmgE3BNC\nOADEfw9OZd+FUkp3KaV7yZKmn4gHgFu+4POatvz2Py/Uayk4LNIgOWx/z5OKxQsZK0JFURSjM3uC\nEEIUEkIUTlgGvIFzwEZgUHy1QcBf5o4tVSu6a99L1QRHt0zv/vmWCzSatVu/vvejVlQrox56UxQl\nZ8uOS0ylgfXxPXXyAb9LKbcJIY4Bfwgh3gJuAsZ5+iyr7vlBzFOo+xr0XJjp3SeuO4OP7y39+tax\nLXAqoc4cFEXJ+cyeIKSU14B6KZSHAF7mjidd2+I7UnlNzfSuR66G6JPD+neaUb9CMWNGpiiKYlJq\nfsq0SAnX92vLduUyvXu/Rf8CsGJII5UcFEXJdVSCSMv0otp3t8GZ3vXWwwj9smdVM91MVxRFMaKc\n9KBczpJ0OI1WkzK16+uL/uXw1RAA1o5sZsyoFEXJAiklOqnD0sIyWbkawSA5lSBScuq3xOUPLkDh\n0hnaLTgs0qC3kpWlwK2iurSkmF6cLo4YXQzW+ayzO5QcQUpJ1w1dCXgSwI5eO4jVxdJxfcdk9X7w\n+oHtAdvZeHWjvmxey3m0c2pnznBzLCGlzO4YXpi7u7s8fvy48RueVVbruTTuOhS0z9Aur/18hKPX\nH+rX29YszYIBbmo01mwUGhXKkaAjtHdqn92hmJSUkparW/Io6hGty7fm29bfvnSfhv1C/HAq4kRB\nq4JExETQ+PfGRmm3StEqrOu6zuDnGRUXxZn7ZyhuU5ySNiW5H3GfikUqJjsrycmEECeklO7p1VNn\nEM97EqQlB89xGU4ON0Ke6pODpYXgyqwOL90/aE4SGBZIh3Ud9Os/nv6Rjd03prGH6cTqYvn25Ld0\nqNSBmsVrGmyLiYth/un5DKk9hHX+6/B/5M/OGzvpV6Mffav1pd3aduSzyMepgYlTzuqkDgthQXh0\nOFsDtuJd0RuPVR767Xtu7aHuiroA+Pb3pc+mPsTqYvm146+UsNEmuJJS8ijqEfbWGfv7zum+Pfkt\nv5z9Jd16bSu2ZVLjSSw/v5ynMU/58/Kf+m0tyrVgStMp/HHpDxadXaQvv/L4iv7n2culF2v916Z7\nnL+6/UXlopUB7Wf9+dHPWXVpFQBn3jiDEAKd1DHxwES6OHfBo5wHMboYrj2+xgq/FfqzGSsLK/b0\n2YNdAbtUj2Vq6gziedsmwr8/wtA9UK5BhnZ51+cUm/67w5YxLahZtohx41HS9MHeD9h5Yyf21vbs\n7bOXnTd28uG+D5PV29BtA85Fnc0W13r/9Uw5PMVsxwP4vePvvL7l9Uztc3bQWRNFkzHPYp9hk89w\naHspJV8c+4InUU/4vMXnrLq4illHZwHwgdsHvFn7TeJ0cQzZPoSTwSdTbfvkwJNYWVix9vJafO/6\n8oXnFxmKad+tfYz+Z/SLvyjg1w6/4lzUmWY+hvcgX6nwCrtu7spUW0vaLcGhkAMd1nWgWIFiPIrS\nZkHY2H0jlewqvVB8GT2DUAkiKZ0O5pSH2EiYEpJu9dCIGOrN2KFfD5jTyXix5GJSSm48uYGTnZNB\nuU7q2HVjF+v813HoziEADvU7xBe+X+BS1IXBtQdzJ/wO7dZq139/8PqBUbtHYW9tT0+Xnvxy9hfK\n2ZbjdvjtDMWxucdmHkY+ZODWgfqyWR6z2HBlA1+2/JJi1sa5P3T18VW+8P2C16q9xnt73zNKm5l1\nsO9B7ArYERgWyPgD4zlz/0yG9/27x9+UtS3L8vPL+fbkt9hb27O03VL9p2CAGF0MlsISC5F+x8fz\nIefp+3dfALb12kY528Qu4pGxkTRc2TDF/dpWbIv/I3+CI4KJiI1IsU5qPnL/iDvhd2hdoTVVilbR\nny0ZwycHPyEkMoRDt7W/2V87/IprKVciYyPZFrCNbs7dEEIQo4vBe403D549SNZGRs8+AKY1nca0\nI9PSrVe1WFXWds1Ym89TCeJFfF0HQm9C25nQfEyaVfdeCmbw0mP69WGelZnUsYbxYsmlYnWx1P+1\nPgA/v/IzDUo34PtT37PCb4XJjulVwYvdNxM7B9QqXotVnVfp10fsGqH/505qlOsohtYZyl9X/2Lq\n4akMrDmQXTd2MaHRBMbuGZus/qdNPsWttBurLq5i1aVVDKgxgN8u/JasXoLW5VvzP8//YZ3PmosP\nL+Ib5MutsFsEPQ2is3NnHAo5UKt4LV77+zVmNp9pcAnq1pNbONg6YCksefDsAev819HTpSclbEoQ\n9DSIqLioND89xupi2XdrHy3Lt8RSWBIVF8UXx77g8O3DrOy0kjWX1/DD6R/S/dke6XeEE/dO6D9R\nf+j2Ibtu7uK/+//xbv13WXZ+GUvbLaX3pt7ptpVVb9R8I9nfkXdFb2Y2n0lBq4ImP35GtfmjDfef\n3devHx9wnAKWBbj39B6vrHmFxmUaM7bBWOqUrEOsLpaFZxZyNOgoyzssT9bWPzf/Sfa32MGpA03L\nNqWFY4sXToQqQbyIhK6tn4aAZeq3ZyJj4qj+aeLori6lbNn2nmeevSEdExdDPot8ad5XOXznMALB\nsJ3DMtRmSv/smeFe2u9Yr8gAAA2USURBVJ0n0U/40P1Dmjo0JVbG8t3J7xhZb2SKbxa7b+7mvT2m\n/3Tfy6UXI+qNoKBVQYrkz9mXG1O7mVu0QFEeRz3OUtvdq3Rnw5UNqW7f0G0DfiF+dKrcifMPzvPV\nia84fu84XhW8iIiJYFqzaRTJX4SmPk2pVqwaa7quASA8Opx8Fvnwf+RP7RK1c+y9vpP3TvIs9hnN\nyzXPcltLzy1lX+A+Pvf4nLK2ZY0QnUoQmRdyVZvvwWsKtEh+DTuphMl+ytvbcGBcG+McPweQUjLr\n6CxWX1pNtWLVuPToUrI6W3puoZxtOf2lBp3UUW9FspFT9JeHElgIC/a/tp8i+Yvo/6mllJy+f5rq\n9tWxEBbMPjqbtf5rWd91PVWKVTFo7+rjq1S2q2y0N4S/rvzF5EOT06xzqN8hbCxtOHznMF+e+JLr\nodeT1ZnUeBKty7emTKEyRokrO5y9f5aP9n3EX93/0neTvR56na4buurrNC/XnKDwIK6FXgNI81Lf\nD14/4OnoSWhUKB6rPBhYcyAVCldg1tFZLGm3hIZlUr7EpJiPShCZlXD2MGwflHVNtdouv3u8vUI7\n5tXPO5r9rCFOF0dwRDAOtg4pbp9+ZDprLq/Rr58aeAqBFmN63fDqLM/Y/NotyrVgvtf8FBMDJJ5S\nR8ZGcif8DrEylqrFqmaobXOLiInAOp91hq6tR8dF8yT6CQXzFcQmnw1hMWE5/iwhq8btH0clu0qM\nrDcyu0NRjEgliMyIi4WZxbXlaaFpVk04e/jnw5ZULmmb9WNnUFh0WLIeEc+rYV+DCw8vpLrdq4IX\nr1Z9lZ03drLWfy0rO66k/5b+abZ5qN8hCuYriKWw1Hf3S8mm7puS3ZRWFCVnUs9BZMax+H7Pnb9O\ntcq4Nf/xx/FAALxrls50cmi/tj23w28zt+XcDD245f6bO1FxUZk6RtLkMK7hOP537H8G23ff3G1w\nMzel5LCnz55Ub3ydHXSW08Gn9b2CLIQFO3rtIL9lfqP1CFIUJedQZxAAK18F/x0w5RFYJL/UcO52\nKJ2/P6hfv/xZB/Lns8AvxI9SBUtRwqYEUkok0uBShZSSxecW8+3Jb5O16VTEiYAnAcmW01OqYClW\nd15N4fyF6ft3X648vmKwfXOPzVQook3FGhkbyfQj02nv1J6CVgUZsn1Iim1+7P4x10Kv4VbajS7O\nXdKN4fKjyzjaOuaoniOKomTc/9u79xipyjuM498n3AS8AF4qqBGoaEpUdEVZatuoRUQr0hAbIaio\n1dZLk6q9QTRewh+N1aixtfXSamlt0UppqxQKqNS7KFZRrFzWS2EFBW24RDBC+fWP8+5yWGbZYVmY\n2Znnk0zmnPe85+z7zjvwm/Oec97XXUw746b94NgxMPre7TbVrVrPsNuzIb8vqD2cHww/kh7dOm/z\nMM3kEZMZ/4/x2+3b1Mj+I3n83ceLLtaz5z3L0jVLmfTSJKaOnErnDp2bzbtx80Y2bt5YMU/Hmtnu\n4y6mYq1Zlr03cwG3ITgATPrm0QBMWzqNG1/YOoFQS8FhwkkTGPelrDtn1BGjuHT2pc3mnXTyJGoO\nqmk8Czjx4BOLGiaia8eu2z2Rama2KxwgPng1ez9m+xlOn1r0UeNyw1PSy9ct3yY4XHXcVY0PHN00\n9CbmLJvDHafcwTX/vIYNmzYwecTkbW7NHNJ7SMmHNzAzK4a7mP5yOSyYAj95H7puvdAaEQy6eTbr\nPtvMw5cNYclnMxh00CDOn3E+ALW9a7l/+P3NHNTMrHy5i6lY9a9A/1Mag8PGz//H2XfPZmW3O+jQ\nbyXndP0llz136na7OTiYWaWr7gCxbiV8UgcnXNyYdPqD17O213QarkjM3Xjldru9PO7lPVRAM7PS\nqe4A8Ux6TqBvNl7K00tWs3av6QWz7unhos3MSq16A8SWLTD/gWz54GNZ8tF6vjvzR3TqAX27D+Lx\ncx9iw6YNLF+/nKN6HVXaspqZlUD1BogPF2Tvx43jlZWLuOSJMXTqkSX97uyfA9CtUzcHBzOrWi2P\nUFap3sueb4gTL+OSJ8Y0Jo8+YrSHjTAzo4rPID5ds4rb9u/J1CcubEzzdQYzs63K7gxC0ghJiyXV\nSZqwu/7Oc2+8wNR992lcnzX6aQcHM7OcsgoQkjoAdwNnAgOBsZIG7nivnRcR/LBPNm/s3h3347Xz\nF9BnH49hZGaWV1YBAjgJqIuIdyPic+BhYFRb/5FFy95qXH5x3HN07FBuH4OZWemV2/+MhwDLc+v1\nKa1NzX06u0vphgMvaOtDm5lVjHILEIXm79xmsChJ35E0X9L81atXt+qP1Aw+j5Gf9+HUWgcIM7Pm\nlNtdTPXAYbn1Q4EV+QwRcR9wH2SD9bXmj9QOPI3agae1toxmZlWh3M4gXgEGSOonqTMwBmh5MgQz\nM2tzZXUGERGbJX0PmAV0AB6IiLda2M3MzHaDsgoQABExA5hR6nKYmVW7cutiMjOzMuEAYWZmBTlA\nmJlZQQ4QZmZWkAOEmZkVpIhWPWtWFiStBv7Tyt0PAD5uw+K0B65zdXCdq8Ou1PnwiDiwpUztOkDs\nCknzI2JwqcuxJ7nO1cF1rg57os7uYjIzs4IcIMzMrKBqDhD3lboAJeA6VwfXuTrs9jpX7TUIMzPb\nsWo+gzAzsx2oygAhaYSkxZLqJE0odXlaS9JhkuZKelvSW5K+n9J7SZojaWl675nSJemuVO83JNXk\njjU+5V8qaXyp6lQsSR0kvSZpelrvJ2leKv8jabh4JHVJ63Vpe9/cMSam9MWSzihNTYojqYekqZIW\npfYeWuntLOma9L1eKGmKpL0qrZ0lPSBplaSFubQ2a1dJJ0h6M+1zl6RCk7I1LyKq6kU2jPg7QH+g\nM7AAGFjqcrWyLr2BmrS8D7AEGAj8DJiQ0icAt6Tls4CZZDP31QLzUnov4N303jMt9yx1/Vqo+7XA\nH4Hpaf1PwJi0fA9wRVq+ErgnLY8BHknLA1PbdwH6pe9Eh1LXawf1nQxcmpY7Az0quZ3Jphp+D+ia\na9+LKq2dga8BNcDCXFqbtSvwMjA07TMTOHOnylfqD6gEDTIUmJVbnwhMLHW52qhufwNOBxYDvVNa\nb2BxWr4XGJvLvzhtHwvcm0vfJl+5vchmGnwSOA2Ynr78HwMdm7Yx2dwiQ9Nyx5RPTds9n6/cXsC+\n6T9LNUmv2HZm6/z0vVK7TQfOqMR2Bvo2CRBt0q5p26Jc+jb5inlVYxdTwxevQX1Ka9fSKfXxwDzg\nCxGxEiC9H5SyNVf39vaZ3An8GNiS1vcH1kTE5rSeL39j3dL2tSl/e6pzf2A18GDqVvu1pO5UcDtH\nxAfAbcAyYCVZu71KZbdzg7Zq10PSctP0olVjgCjUB9eub+WStDfwZ+DqiFi3o6wF0mIH6WVH0tnA\nqoh4NZ9cIGu0sK3d1JnsF3EN8KuIOB74lKzroTntvs6p330UWbdQH6A7cGaBrJXUzi3Z2Truct2r\nMUDUA4fl1g8FVpSoLLtMUiey4PCHiJiWkj+S1Dtt7w2sSunN1b09fSYnA+dIeh94mKyb6U6gh6SG\nGRLz5W+sW9q+H/Bf2led64H6iJiX1qeSBYxKbudhwHsRsToiNgHTgC9T2e3coK3atT4tN00vWjUG\niFeAAeluiM5kF7QeK3GZWiXdkfAb4O2IuD236TGg4U6G8WTXJhrSL0x3Q9QCa9Mp7CxguKSe6Zfb\n8JRWdiJiYkQcGhF9ydruqYgYB8wFzk3Zmta54bM4N+WPlD4m3f3SDxhAdkGv7ETEh8BySUelpK8D\n/6aC25msa6lWUrf0PW+oc8W2c06btGvatl5SbfoML8wdqzilvkBTootCZ5Hd8fMOcF2py7ML9fgK\n2SnjG8Dr6XUWWd/rk8DS9N4r5Rdwd6r3m8Dg3LEuAerS6+JS163I+p/C1ruY+pP9w68DHgW6pPS9\n0npd2t4/t/916bNYzE7e3VGCuh4HzE9t/Veyu1Uqup2Bm4FFwELg92R3IlVUOwNTyK6xbCL7xf/t\ntmxXYHD6/N4BfkGTGx1aevlJajMzK6gau5jMzKwIDhBmZlaQA4SZmRXkAGFmZgU5QJiZWUEOEGat\nJOlqSd1KXQ6z3cW3uZq1Unqae3BEfFzqspjtDj6DMCuCpO6S/i5pQZqf4EayMYLmSpqb8gyX9KKk\nf0l6NI2RhaT3Jd0i6eX0OiKlfysda4GkZ0pXO7PCHCDMijMCWBERgyLiaLLxn1YAp0bEqZIOAK4H\nhkVEDdlTz9fm9l8XESeRPc16Z0q7ATgjIgYB5+ypipgVywHCrDhvAsPSmcBXI2Jtk+21ZJPTPC/p\ndbIxdA7PbZ+Sex+alp8HfivpMrKJrMzKSseWs5hZRCyRdALZWFc/lTS7SRYBcyJibHOHaLocEZdL\nGgJ8A3hd0nER8Ulbl92stXwGYVYESX2ADRHxENlENjXAerKpXgFeAk7OXV/oJunI3CHOy72/mPJ8\nMSLmRcQNZDOg5YdsNis5n0GYFecY4FZJW8hG3ryCrKtopqSV6TrERcAUSV3SPteTjRoM0EXSPLIf\nZQ1nGbdKGkB29vEk2dzJZmXDt7ma7Wa+HdbaK3cxmZlZQT6DMDOzgnwGYWZmBTlAmJlZQQ4QZmZW\nkAOEmZkV5ABhZmYFOUCYmVlB/wcwZlo7Ig6jfwAAAABJRU5ErkJggg==\n", 337 | "text/plain": [ 338 | "
" 339 | ] 340 | }, 341 | "metadata": {}, 342 | "output_type": "display_data" 343 | } 344 | ], 345 | "source": [ 346 | "# Uncomment agents\n", 347 | "agents = [\n", 348 | " EpsilonGreedyAgent(),\n", 349 | " UCBAgent(),\n", 350 | " ThompsonSamplingAgent()\n", 351 | "]\n", 352 | "\n", 353 | "regret = get_regret(BernoulliBandit(), agents, n_steps=10000, n_trials=10)\n", 354 | "plot_regret(regret)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "### Submit to coursera" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 13, 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "ename": "AttributeError", 371 | "evalue": "'str' object has no attribute 'name'", 372 | "output_type": "error", 373 | "traceback": [ 374 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 375 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 376 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msubmit\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msubmit_bandits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0msubmit_bandits\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mregret\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"matcha.11@samsung.com\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"lGkeyGx68ATeXZLy\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 377 | "\u001b[0;32m~/work/notebooks/week6_outro/submit.py\u001b[0m in \u001b[0;36msubmit_bandits\u001b[0;34m(scores, email, token)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0magent\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mscores\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;34m\"EpsilonGreedyAgent\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0mepsilon_greedy_agent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"UCBAgent\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 378 | "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'name'" 379 | ] 380 | } 381 | ], 382 | "source": [ 383 | "from submit import submit_bandits\n", 384 | "\n", 385 | "submit_bandits(regret, \"matcha.11@samsung.com\", \"lGkeyGx68ATeXZLy\")" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": { 392 | "collapsed": true 393 | }, 394 | "outputs": [], 395 | "source": [] 396 | } 397 | ], 398 | "metadata": { 399 | "kernelspec": { 400 | "display_name": "Python 3", 401 | "language": "python", 402 | "name": "python3" 403 | }, 404 | "language_info": { 405 | "codemirror_mode": { 406 | "name": "ipython", 407 | "version": 3 408 | }, 409 | "file_extension": ".py", 410 | "mimetype": "text/x-python", 411 | "name": "python", 412 | "nbconvert_exporter": "python", 413 | "pygments_lexer": "ipython3", 414 | "version": "3.6.2" 415 | } 416 | }, 417 | "nbformat": 4, 418 | "nbformat_minor": 2 419 | } 420 | -------------------------------------------------------------------------------- /practice_mcts.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "if type(os.environ.get(\"DISPLAY\")) is not str or len(os.environ.get(\"DISPLAY\"))==0:\n", 13 | " !bash ../xvfb start\n", 14 | " %env DISPLAY=:1" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import gym\n", 26 | "import numpy as np\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "%matplotlib inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Seminar: Monte-carlo tree search\n", 36 | "\n", 37 | "In this seminar, we'll implement a vanilla MCTS planning and use it to solve some Gym envs.\n", 38 | "\n", 39 | "But before we do that, we first need to modify gym env to allow saving and loading game states to facilitate backtracking." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "from gym.core import Wrapper\n", 51 | "from pickle import dumps,loads\n", 52 | "from collections import namedtuple\n", 53 | "\n", 54 | "#a container for get_result function below. Works just like tuple, but prettier\n", 55 | "ActionResult = namedtuple(\"action_result\",(\"snapshot\",\"observation\",\"reward\",\"is_done\",\"info\"))\n", 56 | "\n", 57 | "\n", 58 | "class WithSnapshots(Wrapper):\n", 59 | " \"\"\"\n", 60 | " Creates a wrapper that supports saving and loading environemnt states.\n", 61 | " Required for planning algorithms.\n", 62 | "\n", 63 | " This class will have access to the core environment as self.env, e.g.:\n", 64 | " - self.env.reset() #reset original env\n", 65 | " - self.env.ale.cloneState() #make snapshot for atari. load with .restoreState()\n", 66 | " - ...\n", 67 | "\n", 68 | " You can also use reset, step and render directly for convenience.\n", 69 | " - s, r, done, _ = self.step(action) #step, same as self.env.step(action)\n", 70 | " - self.render(close=True) #close window, same as self.env.render(close=True)\n", 71 | " \"\"\"\n", 72 | "\n", 73 | " \n", 74 | " def get_snapshot(self):\n", 75 | " \"\"\"\n", 76 | " :returns: environment state that can be loaded with load_snapshot \n", 77 | " Snapshots guarantee same env behaviour each time they are loaded.\n", 78 | " \n", 79 | " Warning! Snapshots can be arbitrary things (strings, integers, json, tuples)\n", 80 | " Don't count on them being pickle strings when implementing MCTS.\n", 81 | " \n", 82 | " Developer Note: Make sure the object you return will not be affected by \n", 83 | " anything that happens to the environment after it's saved.\n", 84 | " You shouldn't, for example, return self.env. \n", 85 | " In case of doubt, use pickle.dumps or deepcopy.\n", 86 | " \n", 87 | " \"\"\"\n", 88 | " #self.render() #close popup windows since we can't pickle them\n", 89 | " if self.unwrapped.viewer is not None:\n", 90 | " self.unwrapped.viewer.close()\n", 91 | " self.unwrapped.viewer = None\n", 92 | " return dumps(self.env)\n", 93 | " \n", 94 | " def load_snapshot(self,snapshot):\n", 95 | " \"\"\"\n", 96 | " Loads snapshot as current env state.\n", 97 | " Should not change snapshot inplace (in case of doubt, deepcopy).\n", 98 | " \"\"\"\n", 99 | " \n", 100 | " assert not hasattr(self,\"_monitor\") or hasattr(self.env,\"_monitor\"), \"can't backtrack while recording\"\n", 101 | "\n", 102 | " #self.close() #close popup windows since we can't load into them\n", 103 | " self.env = loads(snapshot)\n", 104 | " \n", 105 | " def get_result(self,snapshot,action):\n", 106 | " \"\"\"\n", 107 | " A convenience function that \n", 108 | " - loads snapshot, \n", 109 | " - commits action via self.step,\n", 110 | " - and takes snapshot again :)\n", 111 | " \n", 112 | " :returns: next snapshot, next_observation, reward, is_done, info\n", 113 | " \n", 114 | " Basically it returns next snapshot and everything that env.step would have returned.\n", 115 | " \"\"\"\n", 116 | " \n", 117 | " self.load_snapshot(snapshot)\n", 118 | " s, r, done, info = self.step(action)\n", 119 | " next_snapshot = self.get_snapshot()\n", 120 | " \n", 121 | " return ActionResult(next_snapshot, #fill in the variables\n", 122 | " s, \n", 123 | " r, done, info)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### try out snapshots:\n" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 4, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "\u001b[33mWARN: gym.spaces.Box autodetected dtype as . Please provide explicit dtype.\u001b[0m\n", 143 | "\u001b[33mWARN: doesn't implement 'reset' method, which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.\u001b[0m\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "#make env\n", 149 | "env = WithSnapshots(gym.make(\"CartPole-v0\"))\n", 150 | "env.reset()\n", 151 | "\n", 152 | "n_actions = env.action_space.n" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 5, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "initial_state:\n" 165 | ] 166 | }, 167 | { 168 | "data": { 169 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD8CAYAAAB9y7/cAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAEqVJREFUeJzt3XGs3eV93/H3p5hAlmQ1hAvybDOT\nxltDp8XQO+KIaaKQtsCqmkrNBJsaFCFdJhEpUaOt0EkrkYbUSmvYonUobqFxpiyEkaRYiDVlDlGV\nPwKxE8excSg3iRPf2sNmAZIsGpvJd3/c54ZTc3zv8T33+vo+eb+ko/P7Pb/n/O73CSef+7vP+T0+\nqSokSf35mZUuQJK0PAx4SeqUAS9JnTLgJalTBrwkdcqAl6ROLVvAJ7k+yTNJppPcuVw/R5I0XJbj\nPvgk5wB/BfwyMAN8Gbilqp5e8h8mSRpqua7grwKmq+pbVfV/gQeBbcv0syRJQ6xZpvOuBw4P7M8A\n7zhV54suuqg2bdq0TKVI0upz6NAhnn/++YxzjuUK+GFF/Y25oCRTwBTApZdeyu7du5epFElafSYn\nJ8c+x3JN0cwAGwf2NwBHBjtU1faqmqyqyYmJiWUqQ5J+ei1XwH8Z2JzksiSvA24Gdi7Tz5IkDbEs\nUzRVdSLJ+4DPAecAD1TVgeX4WZKk4ZZrDp6qegx4bLnOL0manytZJalTBrwkdcqAl6ROGfCS1CkD\nXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAl\nqVMGvCR1aqyv7EtyCPgB8Apwoqomk1wIfArYBBwC/llVvTBemZKk07UUV/C/VFVbqmqy7d8J7Kqq\nzcCuti9JOsOWY4pmG7Cjbe8AblqGnyFJWsC4AV/AXyTZk2SqtV1SVUcB2vPFY/4MSdIijDUHD1xd\nVUeSXAw8nuQbo76w/UKYArj00kvHLEOSdLKxruCr6kh7PgZ8FrgKeC7JOoD2fOwUr91eVZNVNTkx\nMTFOGZKkIRYd8EnekORNc9vArwD7gZ3Ara3brcAj4xYpSTp940zRXAJ8Nsncef5rVf15ki8DDyW5\nDfgu8O7xy5Qkna5FB3xVfQt4+5D2/wVcN05RkqTxuZJVkjplwEtSpwx4SeqUAS9JnTLgJalTBrwk\ndcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1Kn\nDHhJ6tSCAZ/kgSTHkuwfaLswyeNJnm3PF7T2JPlIkukk+5JcuZzFS5JObZQr+I8B15/Udiewq6o2\nA7vaPsANwOb2mALuW5oyJUmna8GAr6q/BL53UvM2YEfb3gHcNND+8Zr1JWBtknVLVawkaXSLnYO/\npKqOArTni1v7euDwQL+Z1vYaSaaS7E6y+/jx44ssQ5J0Kkv9IWuGtNWwjlW1vaomq2pyYmJiicuQ\nJC024J+bm3ppz8da+wywcaDfBuDI4suTJC3WYgN+J3Br274VeGSg/T3tbpqtwEtzUzmSpDNrzUId\nknwSuAa4KMkM8HvA7wMPJbkN+C7w7tb9MeBGYBr4EfDeZahZkjSCBQO+qm45xaHrhvQt4I5xi5Ik\njc+VrJLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnq\nlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOrVgwCd5IMmxJPsH2u5O8tdJ9rbHjQPH7koyneSZ\nJL+6XIVLkuY3yhX8x4Drh7TfW1Vb2uMxgCSXAzcDv9Be85+TnLNUxUqSRrdgwFfVXwLfG/F824AH\nq+rlqvo2MA1cNUZ9kqRFGmcO/n1J9rUpnAta23rg8ECfmdb2GkmmkuxOsvv48eNjlCFJGmaxAX8f\n8HPAFuAo8IetPUP61rATVNX2qpqsqsmJiYlFliFJOpVFBXxVPVdVr1TVj4E/5tVpmBlg40DXDcCR\n8UqUJC3GogI+ybqB3d8A5u6w2QncnOS8JJcBm4GnxitRkrQYaxbqkOSTwDXARUlmgN8Drkmyhdnp\nl0PA7QBVdSDJQ8DTwAngjqp6ZXlKlyTNZ8GAr6pbhjTfP0//e4B7xilKkjQ+V7JKUqcMeEnqlAEv\nSZ0y4CWpUwa8JHXKgJekTi14m6T002TP9tuHtv/i1EfPcCXS+LyCl6ROGfCS1CkDXpI6ZcBLUqcM\neEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOrVgwCfZmOSJJAeTHEjy/tZ+YZLHkzzbni9o\n7UnykSTTSfYluXK5ByFJeq1RruBPAB+sqrcBW4E7klwO3AnsqqrNwK62D3ADsLk9poD7lrxq6Qzy\nHxrTarVgwFfV0ar6Stv+AXAQWA9sA3a0bjuAm9r2NuDjNetLwNok65a8cknSvE5rDj7JJuAK4Eng\nkqo6CrO/BICLW7f1wOGBl820tpPPNZVkd5Ldx48fP/3KJUnzGjngk7wR+DTwgar6/nxdh7TVaxqq\ntlfVZFVNTkxMjFqGJGlEIwV8knOZDfdPVNVnWvNzc1Mv7flYa58BNg68fANwZGnKlSSNapS7aALc\nDxysqg8PHNoJ3Nq2bwUeGWh/T7ubZivw0txUjiTpzBnlK/uuBn4L+HqSva3td4HfBx5KchvwXeDd\n7dhjwI3ANPAj4L1LWrEkaSQLBnxVfZHh8+oA1w3pX8AdY9YlSRqTK1klqVMGvCR1yoCXpE4Z8JLU\nKQNekjplwEvNnu23r3QJ0pIy4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAl\nqVMGvCR1yoCXpE4Z8JLUqVG+dHtjkieSHExyIMn7W/vdSf46yd72uHHgNXclmU7yTJJfXc4BSJKG\nG+VLt08AH6yqryR5E7AnyePt2L1V9e8HOye5HLgZ+AXg7wD/I8nfq6pXlrJw6Uz4xamPrnQJ0qIt\neAVfVUer6itt+wfAQWD9PC/ZBjxYVS9X1beBaeCqpShWkjS605qDT7IJuAJ4sjW9L8m+JA8kuaC1\nrQcOD7xshvl/IUiSlsHIAZ/kjcCngQ9U1feB+4CfA7YAR4E/nOs65OU15HxTSXYn2X38+PHTLlyS\nNL+RAj7JucyG+yeq6jMAVfVcVb1SVT8G/phXp2FmgI0DL98AHDn5nFW1vaomq2pyYmJinDFIkoYY\n5S6aAPcDB6vqwwPt6wa6/Qawv23vBG5Ocl6Sy4DNwFNLV7IkaRSj3EVzNfBbwNeT7G1tvwvckmQL\ns9Mvh4DbAarqQJKHgKeZvQPnDu+gkaQzb8GAr6ovMnxe/bF5XnMPcM8YdUmSxuRKVknqlAEvSZ0y\n4CWpUwa8JHXKgJekThnwErBn++0rXYK05Ax4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6\nZcCrW0lGfiznOaSVYsBLUqdG+cIP6afCo0enfrL9a+u2r2Al0tLwCl7ib4b7sH1pNTLgpVMw5LXa\njfKl2+cneSrJ15IcSPKh1n5ZkieTPJvkU0le19rPa/vT7fim5R2CtDzuvntypUuQxjLKFfzLwLVV\n9XZgC3B9kq3AHwD3VtVm4AXgttb/NuCFqnorcG/rJ53VTp5zdw5ePRjlS7cL+GHbPbc9CrgW+Oet\nfQdwN3AfsK1tAzwM/KckaeeRzkqTt28HXg31u1esEmnpjHQXTZJzgD3AW4E/Ar4JvFhVJ1qXGWB9\n214PHAaoqhNJXgLeDDx/qvPv2bPH+4i16vke1tlmpICvqleALUnWAp8F3jasW3se9i5/zdV7kilg\nCuDSSy/lO9/5zkgFS6M604HrH6laSpOT438GdFp30VTVi8AXgK3A2iRzvyA2AEfa9gywEaAd/1ng\ne0POtb2qJqtqcmJiYnHVS5JOaZS7aCbalTtJXg+8CzgIPAH8Zut2K/BI297Z9mnHP+/8uySdeaNM\n0awDdrR5+J8BHqqqR5M8DTyY5N8BXwXub/3vB/5Lkmlmr9xvXoa6JUkLGOUumn3AFUPavwVcNaT9\n/wDvXpLqJEmL5kpWSeqUAS9JnTLgJalT/nPB6pY3b+mnnVfwktQpA16SOmXAS1KnDHhJ6pQBL0md\nMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTo3zp9vlJnkrytSQHknyo\ntX8sybeT7G2PLa09ST6SZDrJviRXLvcgJEmvNcq/B/8ycG1V/TDJucAXk/z3duxfVdXDJ/W/Adjc\nHu8A7mvPkqQzaMEr+Jr1w7Z7bnvM900K24CPt9d9CVibZN34pUqSTsdIc/BJzkmyFzgGPF5VT7ZD\n97RpmHuTnNfa1gOHB14+09okSWfQSAFfVa9U1RZgA3BVkn8A3AX8PPCPgAuB32ndM+wUJzckmUqy\nO8nu48ePL6p4SdKpndZdNFX1IvAF4PqqOtqmYV4G/hS4qnWbATYOvGwDcGTIubZX1WRVTU5MTCyq\neEnSqY1yF81EkrVt+/XAu4BvzM2rJwlwE7C/vWQn8J52N81W4KWqOros1UuSTmmUu2jWATuSnMPs\nL4SHqurRJJ9PMsHslMxe4F+2/o8BNwLTwI+A9y592ZKkhSwY8FW1D7hiSPu1p+hfwB3jlyZJGocr\nWSWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNe\nkjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROjRzwSc5J8tUkj7b9y5I8meTZJJ9K8rrWfl7b\nn27HNy1P6ZKk+ZzOFfz7gYMD+38A3FtVm4EXgNta+23AC1X1VuDe1k+SdIaNFPBJNgD/FPiTth/g\nWuDh1mUHcFPb3tb2aceva/0lSWfQmhH7/QfgXwNvavtvBl6sqhNtfwZY37bXA4cBqupEkpda/+cH\nT5hkCphquy8n2b+oEZz9LuKksXei13FBv2NzXKvL300yVVXbF3uCBQM+ya8Bx6pqT5Jr5pqHdK0R\njr3aMFv09vYzdlfV5EgVrzK9jq3XcUG/Y3Ncq0+S3bScXIxRruCvBn49yY3A+cDfZvaKfm2SNe0q\nfgNwpPWfATYCM0nWAD8LfG+xBUqSFmfBOfiququqNlTVJuBm4PNV9S+AJ4DfbN1uBR5p2zvbPu34\n56vqNVfwkqTlNc598L8D/HaSaWbn2O9v7fcDb27tvw3cOcK5Fv0nyCrQ69h6HRf0OzbHtfqMNbZ4\ncS1JfXIlqyR1asUDPsn1SZ5pK19Hmc45qyR5IMmxwds8k1yY5PG2yvfxJBe09iT5SBvrviRXrlzl\n80uyMckTSQ4mOZDk/a19VY8tyflJnkrytTauD7X2LlZm97riPMmhJF9PsrfdWbLq34sASdYmeTjJ\nN9r/1965lONa0YBPcg7wR8ANwOXALUkuX8maFuFjwPUntd0J7GqrfHfx6ucQNwCb22MKuO8M1bgY\nJ4APVtXbgK3AHe2/zWof28vAtVX1dmALcH2SrfSzMrvnFee/VFVbBm6JXO3vRYD/CPx5Vf088HZm\n/9st3biqasUewDuBzw3s3wXctZI1LXIcm4D9A/vPAOva9jrgmbb9UeCWYf3O9gezd0n9ck9jA/4W\n8BXgHcwulFnT2n/yvgQ+B7yzba9p/bLStZ9iPBtaIFwLPMrsmpRVP65W4yHgopPaVvV7kdlbzr99\n8v/uSzmulZ6i+cmq12ZwRexqdklVHQVozxe39lU53vbn+xXAk3QwtjaNsRc4BjwOfJMRV2YDcyuz\nz0ZzK85/3PZHXnHO2T0umF0s+RdJ9rRV8LD634tvAY4Df9qm1f4kyRtYwnGtdMCPtOq1I6tuvEne\nCHwa+EBVfX++rkPazsqxVdUrVbWF2Sveq4C3DevWnlfFuDKw4nyweUjXVTWuAVdX1ZXMTlPckeSf\nzNN3tYxtDXAlcF9VXQH8b+a/rfy0x7XSAT+36nXO4IrY1ey5JOsA2vOx1r6qxpvkXGbD/RNV9ZnW\n3MXYAKrqReALzH7GsLatvIbhK7M5y1dmz604PwQ8yOw0zU9WnLc+q3FcAFTVkfZ8DPgss7+YV/t7\ncQaYqaon2/7DzAb+ko1rpQP+y8Dm9kn/65hdKbtzhWtaCoOreU9e5fue9mn4VuCluT/FzjZJwuyi\ntYNV9eGBQ6t6bEkmkqxt268H3sXsB1uremV2dbziPMkbkrxpbhv4FWA/q/y9WFX/Ezic5O+3puuA\np1nKcZ0FHzTcCPwVs/Og/2al61lE/Z8EjgL/j9nfsLcxO5e5C3i2PV/Y+obZu4a+CXwdmFzp+ucZ\n1z9m9s+/fcDe9rhxtY8N+IfAV9u49gP/trW/BXgKmAb+G3Beaz+/7U+3429Z6TGMMMZrgEd7GVcb\nw9fa48BcTqz292KrdQuwu70f/wy4YCnH5UpWSerUSk/RSJKWiQEvSZ0y4CWpUwa8JHXKgJekThnw\nktQpA16SOmXAS1Kn/j9FsYgBwJ4YbgAAAABJRU5ErkJggg==\n", 170 | "text/plain": [ 171 | "
" 172 | ] 173 | }, 174 | "metadata": {}, 175 | "output_type": "display_data" 176 | } 177 | ], 178 | "source": [ 179 | "print(\"initial_state:\")\n", 180 | "\n", 181 | "plt.imshow(env.render('rgb_array'))\n", 182 | "\n", 183 | "#create first snapshot\n", 184 | "snap0 = env.get_snapshot()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 6, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | "Whoops! We died!\n", 197 | "final state:\n" 198 | ] 199 | }, 200 | { 201 | "data": { 202 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD8CAYAAAB9y7/cAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAEvFJREFUeJzt3XGs3eV93/H3p0AgS7IawgV5tplJ\n462hU2PYHSFimigkLbBqplIzQasGVUiXqURK1GgrbNKaSENqpTVs0ToUp9A4VRbCSDIsxJoyQlTl\nj0DshDgYh+IkTrm1h80CJFk1NpPv/jjPhVNzfO/xvff4+jx5v6Sj8/s95zm/+33g8Lm/+/x+DydV\nhSSpPz+11gVIkibDgJekThnwktQpA16SOmXAS1KnDHhJ6tTEAj7J1UmeSrI/ya2T+jmSpNEyifvg\nk5wG/AXwbmAe+CpwQ1U9ueo/TJI00qTO4C8F9lfVd6rq/wL3ANsm9LMkSSOcPqHjbgCeGdqfB95x\nvM7nnntubd68eUKlSNL0OXDgAM8991xWcoxJBfyoov7GXFCSOWAO4IILLmDXrl0TKkWSps/s7OyK\njzGpKZp5YNPQ/kbg4HCHqtpeVbNVNTszMzOhMiTpJ9ekAv6rwJYkFyZ5HXA9sHNCP0uSNMJEpmiq\n6miS9wFfAE4D7q6qvZP4WZKk0SY1B09VPQg8OKnjS5IW50pWSeqUAS9JnTLgJalTBrwkdcqAl6RO\nGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQB\nL0mdWtFX9iU5APwQeBk4WlWzSc4BPgNsBg4A/7yqnl9ZmZKkE7UaZ/C/UFVbq2q27d8KPFxVW4CH\n274k6SSbxBTNNmBH294BXDeBnyFJWsJKA76AP0uyO8lcazu/qg4BtOfzVvgzJEnLsKI5eODyqjqY\n5DzgoSTfGveN7RfCHMAFF1ywwjIkScda0Rl8VR1sz4eBzwOXAs8mWQ/Qng8f573bq2q2qmZnZmZW\nUoYkaYRlB3ySNyR508I28IvAE8BO4MbW7Ubg/pUWKUk6cSuZojkf+HySheP8l6r60yRfBe5NchPw\nl8B7Vl6mJOlELTvgq+o7wNtHtP8v4KqVFCVJWjlXskpSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6RO\nGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQB\nL0mdWjLgk9yd5HCSJ4bazknyUJKn2/PZrT1JPppkf5I9SS6ZZPGSpOMb5wz+E8DVx7TdCjxcVVuA\nh9s+wDXAlvaYA+5cnTIlSSdqyYCvqj8Hvn9M8zZgR9veAVw31P7JGvgKsC7J+tUqVpI0vuXOwZ9f\nVYcA2vN5rX0D8MxQv/nW9hpJ5pLsSrLryJEjyyxDknQ8q32RNSPaalTHqtpeVbNVNTszM7PKZUiS\nlhvwzy5MvbTnw619Htg01G8jcHD55UmSlmu5Ab8TuLFt3wjcP9T+3nY3zWXAiwtTOZKkk+v0pTok\n+TRwBXBuknngd4HfA+5NchPwl8B7WvcHgWuB/cBfA785gZolSWNYMuCr6objvHTViL4F3LLSoiRJ\nK+dKVknqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1\nyoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnVoy4JPcneRwkieG2j6U5K+SPN4e1w69dluS/Ume\nSvJLkypckrS4cc7gPwFcPaL9jqra2h4PAiS5CLge+Ln2nv+c5LTVKlaSNL4lA76q/hz4/pjH2wbc\nU1UvVdV3gf3ApSuoT5K0TCuZg39fkj1tCufs1rYBeGaoz3xre40kc0l2Jdl15MiRFZQhSRpluQF/\nJ/AzwFbgEPAHrT0j+taoA1TV9qqararZmZmZZZYhSTqeZQV8VT1bVS9X1Y+Bj/PqNMw8sGmo60bg\n4MpKlCQtx7ICPsn6od1fARbusNkJXJ/kzCQXAluAx1ZWoiRpOU5fqkOSTwNXAOcmmQd+F7giyVYG\n0y8HgJsBqmpvknuBJ4GjwC1V9fJkSpckLWbJgK+qG0Y037VI/9uB21dSlCRp5VzJKkmdMuAlqVMG\nvCR1yoCXpE4Z8JLUKQNekjplwEtSp5a8D176SbJ7+82vbP/DuY+tYSXSynkGLzEI9uFwX2iTppkB\nL0mdMuAlqVMGvITz7eqTAS9JnTLgpUV4oVXTzICXpE4Z8JLUKQNearzQqt4Y8JLUqSUDPsmmJI8k\n2Zdkb5L3t/ZzkjyU5On2fHZrT5KPJtmfZE+SSyY9CGmSvNCqaTXOGfxR4INV9TbgMuCWJBcBtwIP\nV9UW4OG2D3ANsKU95oA7V71qSdKSlgz4qjpUVV9r2z8E9gEbgG3AjtZtB3Bd294GfLIGvgKsS7J+\n1SuXJC3qhObgk2wGLgYeBc6vqkMw+CUAnNe6bQCeGXrbfGs79lhzSXYl2XXkyJETr1yaAC+0qidj\nB3ySNwKfBT5QVT9YrOuItnpNQ9X2qpqtqtmZmZlxy5AkjWmsgE9yBoNw/1RVfa41P7sw9dKeD7f2\neWDT0Ns3AgdXp1xp8kadxXuhVdNonLtoAtwF7Kuqjwy9tBO4sW3fCNw/1P7edjfNZcCLC1M5kqST\nZ5wz+MuB3wCuTPJ4e1wL/B7w7iRPA+9u+wAPAt8B9gMfB35r9cuWTj7P4jVtlvzKvqr6MqPn1QGu\nGtG/gFtWWJckaYVcySpJnTLgpRG8XVI9MOAlqVMGvHQCvNCqaWLAS1KnDHhJ6pQBLx2HF1o17Qx4\nSeqUAS+dIC+0aloY8JLUKQNekjplwEuL8EKrppkBL0mdMuClZfBCq6aBAS9JnTLgJalTBry0BC+0\naloZ8JLUqXG+dHtTkkeS7EuyN8n7W/uHkvzVMd/TuvCe25LsT/JUkl+a5ACkteKFVp3qlvxOVuAo\n8MGq+lqSNwG7kzzUXrujqv79cOckFwHXAz8H/B3gfyT5e1X18moWLkla3JJn8FV1qKq+1rZ/COwD\nNizylm3APVX1UlV9F9gPXLoaxUqSxndCc/BJNgMXA4+2pvcl2ZPk7iRnt7YNwDNDb5tn8V8I0inP\nC62aRmMHfJI3Ap8FPlBVPwDuBH4G2AocAv5goeuIt9eI480l2ZVk15EjR064cEnS4sYK+CRnMAj3\nT1XV5wCq6tmqermqfgx8nFenYeaBTUNv3wgcPPaYVbW9qmaranZmZmYlY5DWjBdadSob5y6aAHcB\n+6rqI0Pt64e6/QrwRNveCVyf5MwkFwJbgMdWr2RpbThNo2kzzl00lwO/AXwzyeOt7V8DNyTZymD6\n5QBwM0BV7U1yL/AkgztwbvEOGkk6+ZYM+Kr6MqPn1R9c5D23A7evoC5pauzefrNn9zoluZJVkjpl\nwEtSpwx46QQ4FaNpYsBLUqcMeGkVeD+8TkUGvCR1yoCXpE4Z8NIJ8kKrpoUBL60S5+F1qjHgJalT\nBrwkdcqAl6ROGfDSMnihVdPAgJeOkWSsx0reu9gxpNViwEuraNfH5ta6BOkV43zhh6RFPHDo1VD/\n5fXb17AS6W/yDF5ageFwH7UvrSUDXlqm2Zs9W9epbZwv3T4ryWNJvpFkb5IPt/YLkzya5Okkn0ny\nutZ+Ztvf317fPNkhSJJGGecM/iXgyqp6O7AVuDrJZcDvA3dU1RbgeeCm1v8m4PmqeitwR+sndenY\nOXfn4HUqGedLtwv4Uds9oz0KuBL4tda+A/gQcCewrW0D3Af8pyRpx5G6MpimeTXUP7RmlUivNdZd\nNElOA3YDbwX+EPg28EJVHW1d5oENbXsD8AxAVR1N8iLwZuC54x1/9+7d3hOsn0h+7jVJYwV8Vb0M\nbE2yDvg88LZR3drzqE/sa87ek8wBcwAXXHAB3/ve98YqWJq0kxm6/mGr45mdnV3xMU7oLpqqegH4\nEnAZsC7Jwi+IjcDBtj0PbAJor/808P0Rx9peVbNVNTszM7O86iVJxzXOXTQz7cydJK8H3gXsAx4B\nfrV1uxG4v23vbPu017/o/LsknXzjTNGsB3a0efifAu6tqgeSPAnck+TfAV8H7mr97wL+JMl+Bmfu\n10+gbknSEsa5i2YPcPGI9u8Al45o/z/Ae1alOknSsrmSVZI6ZcBLUqcMeEnqlP+7YOkY3vSlXngG\nL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS\n1CkDXpI6Nc6Xbp+V5LEk30iyN8mHW/snknw3yePtsbW1J8lHk+xPsifJJZMehCTptcb5/8G/BFxZ\nVT9Kcgbw5ST/vb32L6vqvmP6XwNsaY93AHe2Z0nSSbTkGXwN/KjtntEei30jwjbgk+19XwHWJVm/\n8lIlSSdirDn4JKcleRw4DDxUVY+2l25v0zB3JDmztW0Anhl6+3xrkySdRGMFfFW9XFVbgY3ApUn+\nAXAb8LPAPwLOAX6ndc+oQxzbkGQuya4ku44cObKs4iVJx3dCd9FU1QvAl4Crq+pQm4Z5Cfhj4NLW\nbR7YNPS2jcDBEcfaXlWzVTU7MzOzrOIlScc3zl00M0nWte3XA+8CvrUwr54kwHXAE+0tO4H3trtp\nLgNerKpDE6leknRc49xFsx7YkeQ0Br8Q7q2qB5J8MckMgymZx4F/0fo/CFwL7Af+GvjN1S9bkrSU\nJQO+qvYAF49ov/I4/Qu4ZeWlSZJWwpWsktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCX\npE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqfGDvgk\npyX5epIH2v6FSR5N8nSSzyR5XWs/s+3vb69vnkzpkqTFnMgZ/PuBfUP7vw/cUVVbgOeBm1r7TcDz\nVfVW4I7WT5J0ko0V8Ek2Av8U+KO2H+BK4L7WZQdwXdve1vZpr1/V+kuSTqLTx+z3H4B/Bbyp7b8Z\neKGqjrb9eWBD294APANQVUeTvNj6Pzd8wCRzwFzbfSnJE8sawanvXI4Zeyd6HRf0OzbHNV3+bpK5\nqtq+3AMsGfBJfhk4XFW7k1yx0Dyia43x2qsNg6K3t5+xq6pmx6p4yvQ6tl7HBf2OzXFNnyS7aDm5\nHOOcwV8O/LMk1wJnAX+bwRn9uiSnt7P4jcDB1n8e2ATMJzkd+Gng+8stUJK0PEvOwVfVbVW1sao2\nA9cDX6yqXwceAX61dbsRuL9t72z7tNe/WFWvOYOXJE3WSu6D/x3gt5PsZzDHfldrvwt4c2v/beDW\nMY617D9BpkCvY+t1XNDv2BzX9FnR2OLJtST1yZWsktSpNQ/4JFcneaqtfB1nOueUkuTuJIeHb/NM\nck6Sh9oq34eSnN3ak+Sjbax7klyydpUvLsmmJI8k2Zdkb5L3t/apHluSs5I8luQbbVwfbu1drMzu\ndcV5kgNJvpnk8XZnydR/FgGSrEtyX5Jvtf/W3rma41rTgE9yGvCHwDXARcANSS5ay5qW4RPA1ce0\n3Qo83Fb5Psyr1yGuAba0xxxw50mqcTmOAh+sqrcBlwG3tH830z62l4Arq+rtwFbg6iSX0c/K7J5X\nnP9CVW0duiVy2j+LAP8R+NOq+lng7Qz+3a3euKpqzR7AO4EvDO3fBty2ljUtcxybgSeG9p8C1rft\n9cBTbftjwA2j+p3qDwZ3Sb27p7EBfwv4GvAOBgtlTm/tr3wugS8A72zbp7d+WevajzOejS0QrgQe\nYLAmZerH1Wo8AJx7TNtUfxYZ3HL+3WP/ua/muNZ6iuaVVa/N8IrYaXZ+VR0CaM/ntfapHG/78/1i\n4FE6GFubxngcOAw8BHybMVdmAwsrs09FCyvOf9z2x15xzqk9LhgslvyzJLvbKniY/s/iW4AjwB+3\nabU/SvIGVnFcax3wY6167cjUjTfJG4HPAh+oqh8s1nVE2yk5tqp6uaq2MjjjvRR426hu7XkqxpWh\nFefDzSO6TtW4hlxeVZcwmKa4Jck/WaTvtIztdOAS4M6quhj43yx+W/kJj2utA35h1euC4RWx0+zZ\nJOsB2vPh1j5V401yBoNw/1RVfa41dzE2gKp6AfgSg2sM69rKaxi9MptTfGX2worzA8A9DKZpXllx\n3vpM47gAqKqD7fkw8HkGv5in/bM4D8xX1aNt/z4Ggb9q41rrgP8qsKVd6X8dg5WyO9e4ptUwvJr3\n2FW+721Xwy8DXlz4U+xUkyQMFq3tq6qPDL001WNLMpNkXdt+PfAuBhe2pnpldnW84jzJG5K8aWEb\n+EXgCab8s1hV/xN4Jsnfb01XAU+ymuM6BS40XAv8BYN50H+z1vUso/5PA4eA/8fgN+xNDOYyHwae\nbs/ntL5hcNfQt4FvArNrXf8i4/rHDP782wM83h7XTvvYgJ8Hvt7G9QTwb1v7W4DHgP3AfwXObO1n\ntf397fW3rPUYxhjjFcADvYyrjeEb7bF3ISem/bPYat0K7Gqfx/8GnL2a43IlqyR1aq2naCRJE2LA\nS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUqf8PFXGtmG87Lx4AAAAASUVORK5CYII=\n", 203 | "text/plain": [ 204 | "
" 205 | ] 206 | }, 207 | "metadata": {}, 208 | "output_type": "display_data" 209 | } 210 | ], 211 | "source": [ 212 | "#play without making snapshots (faster)\n", 213 | "while True:\n", 214 | " is_done = env.step(env.action_space.sample())[2]\n", 215 | " if is_done: \n", 216 | " print(\"Whoops! We died!\")\n", 217 | " break\n", 218 | " \n", 219 | "print(\"final state:\")\n", 220 | "plt.imshow(env.render('rgb_array'))\n", 221 | "plt.show()\n" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 7, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "\n", 234 | "\n", 235 | "After loading snapshot\n" 236 | ] 237 | }, 238 | { 239 | "data": { 240 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD8CAYAAAB9y7/cAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAEqVJREFUeJzt3XGs3eV93/H3p5hAlmQ1hAvybDOT\nxltDp8XQO+KIaaKQtsCqmkrNBJsaFCFdJhEpUaOt0EkrkYbUSmvYonUobqFxpiyEkaRYiDVlDlGV\nPwKxE8excSg3iRPf2sNmAZIsGpvJd3/c54ZTc3zv8T33+vo+eb+ko/P7Pb/n/O73CSef+7vP+T0+\nqSokSf35mZUuQJK0PAx4SeqUAS9JnTLgJalTBrwkdcqAl6ROLVvAJ7k+yTNJppPcuVw/R5I0XJbj\nPvgk5wB/BfwyMAN8Gbilqp5e8h8mSRpqua7grwKmq+pbVfV/gQeBbcv0syRJQ6xZpvOuBw4P7M8A\n7zhV54suuqg2bdq0TKVI0upz6NAhnn/++YxzjuUK+GFF/Y25oCRTwBTApZdeyu7du5epFElafSYn\nJ8c+x3JN0cwAGwf2NwBHBjtU1faqmqyqyYmJiWUqQ5J+ei1XwH8Z2JzksiSvA24Gdi7Tz5IkDbEs\nUzRVdSLJ+4DPAecAD1TVgeX4WZKk4ZZrDp6qegx4bLnOL0manytZJalTBrwkdcqAl6ROGfCS1CkD\nXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAl\nqVMGvCR1aqyv7EtyCPgB8Apwoqomk1wIfArYBBwC/llVvTBemZKk07UUV/C/VFVbqmqy7d8J7Kqq\nzcCuti9JOsOWY4pmG7Cjbe8AblqGnyFJWsC4AV/AXyTZk2SqtV1SVUcB2vPFY/4MSdIijDUHD1xd\nVUeSXAw8nuQbo76w/UKYArj00kvHLEOSdLKxruCr6kh7PgZ8FrgKeC7JOoD2fOwUr91eVZNVNTkx\nMTFOGZKkIRYd8EnekORNc9vArwD7gZ3Ara3brcAj4xYpSTp940zRXAJ8Nsncef5rVf15ki8DDyW5\nDfgu8O7xy5Qkna5FB3xVfQt4+5D2/wVcN05RkqTxuZJVkjplwEtSpwx4SeqUAS9JnTLgJalTBrwk\ndcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1Kn\nDHhJ6tSCAZ/kgSTHkuwfaLswyeNJnm3PF7T2JPlIkukk+5JcuZzFS5JObZQr+I8B15/Udiewq6o2\nA7vaPsANwOb2mALuW5oyJUmna8GAr6q/BL53UvM2YEfb3gHcNND+8Zr1JWBtknVLVawkaXSLnYO/\npKqOArTni1v7euDwQL+Z1vYaSaaS7E6y+/jx44ssQ5J0Kkv9IWuGtNWwjlW1vaomq2pyYmJiicuQ\nJC024J+bm3ppz8da+wywcaDfBuDI4suTJC3WYgN+J3Br274VeGSg/T3tbpqtwEtzUzmSpDNrzUId\nknwSuAa4KMkM8HvA7wMPJbkN+C7w7tb9MeBGYBr4EfDeZahZkjSCBQO+qm45xaHrhvQt4I5xi5Ik\njc+VrJLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnq\nlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOrVgwCd5IMmxJPsH2u5O8tdJ9rbHjQPH7koyneSZ\nJL+6XIVLkuY3yhX8x4Drh7TfW1Vb2uMxgCSXAzcDv9Be85+TnLNUxUqSRrdgwFfVXwLfG/F824AH\nq+rlqvo2MA1cNUZ9kqRFGmcO/n1J9rUpnAta23rg8ECfmdb2GkmmkuxOsvv48eNjlCFJGmaxAX8f\n8HPAFuAo8IetPUP61rATVNX2qpqsqsmJiYlFliFJOpVFBXxVPVdVr1TVj4E/5tVpmBlg40DXDcCR\n8UqUJC3GogI+ybqB3d8A5u6w2QncnOS8JJcBm4GnxitRkrQYaxbqkOSTwDXARUlmgN8Drkmyhdnp\nl0PA7QBVdSDJQ8DTwAngjqp6ZXlKlyTNZ8GAr6pbhjTfP0//e4B7xilKkjQ+V7JKUqcMeEnqlAEv\nSZ0y4CWpUwa8JHXKgJekTi14m6T002TP9tuHtv/i1EfPcCXS+LyCl6ROGfCS1CkDXpI6ZcBLUqcM\neEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOrVgwCfZmOSJJAeTHEjy/tZ+YZLHkzzbni9o\n7UnykSTTSfYluXK5ByFJeq1RruBPAB+sqrcBW4E7klwO3AnsqqrNwK62D3ADsLk9poD7lrxq6Qzy\nHxrTarVgwFfV0ar6Stv+AXAQWA9sA3a0bjuAm9r2NuDjNetLwNok65a8cknSvE5rDj7JJuAK4Eng\nkqo6CrO/BICLW7f1wOGBl820tpPPNZVkd5Ldx48fP/3KJUnzGjngk7wR+DTwgar6/nxdh7TVaxqq\ntlfVZFVNTkxMjFqGJGlEIwV8knOZDfdPVNVnWvNzc1Mv7flYa58BNg68fANwZGnKlSSNapS7aALc\nDxysqg8PHNoJ3Nq2bwUeGWh/T7ubZivw0txUjiTpzBnlK/uuBn4L+HqSva3td4HfBx5KchvwXeDd\n7dhjwI3ANPAj4L1LWrEkaSQLBnxVfZHh8+oA1w3pX8AdY9YlSRqTK1klqVMGvCR1yoCXpE4Z8JLU\nKQNekjplwEvNnu23r3QJ0pIy4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAl\nqVMGvCR1yoCXpE4Z8JLUqVG+dHtjkieSHExyIMn7W/vdSf46yd72uHHgNXclmU7yTJJfXc4BSJKG\nG+VLt08AH6yqryR5E7AnyePt2L1V9e8HOye5HLgZ+AXg7wD/I8nfq6pXlrJw6Uz4xamPrnQJ0qIt\neAVfVUer6itt+wfAQWD9PC/ZBjxYVS9X1beBaeCqpShWkjS605qDT7IJuAJ4sjW9L8m+JA8kuaC1\nrQcOD7xshvl/IUiSlsHIAZ/kjcCngQ9U1feB+4CfA7YAR4E/nOs65OU15HxTSXYn2X38+PHTLlyS\nNL+RAj7JucyG+yeq6jMAVfVcVb1SVT8G/phXp2FmgI0DL98AHDn5nFW1vaomq2pyYmJinDFIkoYY\n5S6aAPcDB6vqwwPt6wa6/Qawv23vBG5Ocl6Sy4DNwFNLV7IkaRSj3EVzNfBbwNeT7G1tvwvckmQL\ns9Mvh4DbAarqQJKHgKeZvQPnDu+gkaQzb8GAr6ovMnxe/bF5XnMPcM8YdUmSxuRKVknqlAEvSZ0y\n4CWpUwa8JHXKgJekThnwErBn++0rXYK05Ax4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6\nZcCrW0lGfiznOaSVYsBLUqdG+cIP6afCo0enfrL9a+u2r2Al0tLwCl7ib4b7sH1pNTLgpVMw5LXa\njfKl2+cneSrJ15IcSPKh1n5ZkieTPJvkU0le19rPa/vT7fim5R2CtDzuvntypUuQxjLKFfzLwLVV\n9XZgC3B9kq3AHwD3VtVm4AXgttb/NuCFqnorcG/rJ53VTp5zdw5ePRjlS7cL+GHbPbc9CrgW+Oet\nfQdwN3AfsK1tAzwM/KckaeeRzkqTt28HXg31u1esEmnpjHQXTZJzgD3AW4E/Ar4JvFhVJ1qXGWB9\n214PHAaoqhNJXgLeDDx/qvPv2bPH+4i16vke1tlmpICvqleALUnWAp8F3jasW3se9i5/zdV7kilg\nCuDSSy/lO9/5zkgFS6M604HrH6laSpOT438GdFp30VTVi8AXgK3A2iRzvyA2AEfa9gywEaAd/1ng\ne0POtb2qJqtqcmJiYnHVS5JOaZS7aCbalTtJXg+8CzgIPAH8Zut2K/BI297Z9mnHP+/8uySdeaNM\n0awDdrR5+J8BHqqqR5M8DTyY5N8BXwXub/3vB/5Lkmlmr9xvXoa6JUkLGOUumn3AFUPavwVcNaT9\n/wDvXpLqJEmL5kpWSeqUAS9JnTLgJalT/nPB6pY3b+mnnVfwktQpA16SOmXAS1KnDHhJ6pQBL0md\nMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTo3zp9vlJnkrytSQHknyo\ntX8sybeT7G2PLa09ST6SZDrJviRXLvcgJEmvNcq/B/8ycG1V/TDJucAXk/z3duxfVdXDJ/W/Adjc\nHu8A7mvPkqQzaMEr+Jr1w7Z7bnvM900K24CPt9d9CVibZN34pUqSTsdIc/BJzkmyFzgGPF5VT7ZD\n97RpmHuTnNfa1gOHB14+09okSWfQSAFfVa9U1RZgA3BVkn8A3AX8PPCPgAuB32ndM+wUJzckmUqy\nO8nu48ePL6p4SdKpndZdNFX1IvAF4PqqOtqmYV4G/hS4qnWbATYOvGwDcGTIubZX1WRVTU5MTCyq\neEnSqY1yF81EkrVt+/XAu4BvzM2rJwlwE7C/vWQn8J52N81W4KWqOros1UuSTmmUu2jWATuSnMPs\nL4SHqurRJJ9PMsHslMxe4F+2/o8BNwLTwI+A9y592ZKkhSwY8FW1D7hiSPu1p+hfwB3jlyZJGocr\nWSWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNe\nkjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROjRzwSc5J8tUkj7b9y5I8meTZJJ9K8rrWfl7b\nn27HNy1P6ZKk+ZzOFfz7gYMD+38A3FtVm4EXgNta+23AC1X1VuDe1k+SdIaNFPBJNgD/FPiTth/g\nWuDh1mUHcFPb3tb2aceva/0lSWfQmhH7/QfgXwNvavtvBl6sqhNtfwZY37bXA4cBqupEkpda/+cH\nT5hkCphquy8n2b+oEZz9LuKksXei13FBv2NzXKvL300yVVXbF3uCBQM+ya8Bx6pqT5Jr5pqHdK0R\njr3aMFv09vYzdlfV5EgVrzK9jq3XcUG/Y3Ncq0+S3bScXIxRruCvBn49yY3A+cDfZvaKfm2SNe0q\nfgNwpPWfATYCM0nWAD8LfG+xBUqSFmfBOfiququqNlTVJuBm4PNV9S+AJ4DfbN1uBR5p2zvbPu34\n56vqNVfwkqTlNc598L8D/HaSaWbn2O9v7fcDb27tvw3cOcK5Fv0nyCrQ69h6HRf0OzbHtfqMNbZ4\ncS1JfXIlqyR1asUDPsn1SZ5pK19Hmc45qyR5IMmxwds8k1yY5PG2yvfxJBe09iT5SBvrviRXrlzl\n80uyMckTSQ4mOZDk/a19VY8tyflJnkrytTauD7X2LlZm97riPMmhJF9PsrfdWbLq34sASdYmeTjJ\nN9r/1965lONa0YBPcg7wR8ANwOXALUkuX8maFuFjwPUntd0J7GqrfHfx6ucQNwCb22MKuO8M1bgY\nJ4APVtXbgK3AHe2/zWof28vAtVX1dmALcH2SrfSzMrvnFee/VFVbBm6JXO3vRYD/CPx5Vf088HZm\n/9st3biqasUewDuBzw3s3wXctZI1LXIcm4D9A/vPAOva9jrgmbb9UeCWYf3O9gezd0n9ck9jA/4W\n8BXgHcwulFnT2n/yvgQ+B7yzba9p/bLStZ9iPBtaIFwLPMrsmpRVP65W4yHgopPaVvV7kdlbzr99\n8v/uSzmulZ6i+cmq12ZwRexqdklVHQVozxe39lU53vbn+xXAk3QwtjaNsRc4BjwOfJMRV2YDcyuz\nz0ZzK85/3PZHXnHO2T0umF0s+RdJ9rRV8LD634tvAY4Df9qm1f4kyRtYwnGtdMCPtOq1I6tuvEne\nCHwa+EBVfX++rkPazsqxVdUrVbWF2Sveq4C3DevWnlfFuDKw4nyweUjXVTWuAVdX1ZXMTlPckeSf\nzNN3tYxtDXAlcF9VXQH8b+a/rfy0x7XSAT+36nXO4IrY1ey5JOsA2vOx1r6qxpvkXGbD/RNV9ZnW\n3MXYAKrqReALzH7GsLatvIbhK7M5y1dmz604PwQ8yOw0zU9WnLc+q3FcAFTVkfZ8DPgss7+YV/t7\ncQaYqaon2/7DzAb+ko1rpQP+y8Dm9kn/65hdKbtzhWtaCoOreU9e5fue9mn4VuCluT/FzjZJwuyi\ntYNV9eGBQ6t6bEkmkqxt268H3sXsB1uremV2dbziPMkbkrxpbhv4FWA/q/y9WFX/Ezic5O+3puuA\np1nKcZ0FHzTcCPwVs/Og/2al61lE/Z8EjgL/j9nfsLcxO5e5C3i2PV/Y+obZu4a+CXwdmFzp+ucZ\n1z9m9s+/fcDe9rhxtY8N+IfAV9u49gP/trW/BXgKmAb+G3Beaz+/7U+3429Z6TGMMMZrgEd7GVcb\nw9fa48BcTqz292KrdQuwu70f/wy4YCnH5UpWSerUSk/RSJKWiQEvSZ0y4CWpUwa8JHXKgJekThnw\nktQpA16SOmXAS1Kn/j9FsYgBwJ4YbgAAAABJRU5ErkJggg==\n", 241 | "text/plain": [ 242 | "
" 243 | ] 244 | }, 245 | "metadata": {}, 246 | "output_type": "display_data" 247 | } 248 | ], 249 | "source": [ 250 | "#reload initial state\n", 251 | "env.load_snapshot(snap0)\n", 252 | "\n", 253 | "print(\"\\n\\nAfter loading snapshot\")\n", 254 | "plt.imshow(env.render('rgb_array'))\n", 255 | "plt.show()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 8, 261 | "metadata": { 262 | "collapsed": true 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "#get outcome (snapshot, observation, reward, is_done, info)\n", 267 | "res = env.get_result(snap0,env.action_space.sample())\n", 268 | "\n", 269 | "snap1, observation, reward = res[:3]\n", 270 | "\n", 271 | "#second step\n", 272 | "res2 = env.get_result(snap1,env.action_space.sample())" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "# MCTS: Monte-Carlo tree search\n", 280 | "\n", 281 | "In this section, we'll implement the vanilla MCTS algorithm with UCB1-based node selection.\n", 282 | "\n", 283 | "We will start by implementing the `Node` class - a simple class that acts like MCTS node and supports some of the MCTS algorithm steps.\n", 284 | "\n", 285 | "This MCTS implementation makes some assumptions about the environment, you can find those _in the notes section at the end of the notebook_." 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 9, 291 | "metadata": { 292 | "collapsed": true 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "assert isinstance(env,WithSnapshots)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 10, 302 | "metadata": { 303 | "collapsed": true 304 | }, 305 | "outputs": [], 306 | "source": [ 307 | "class Node:\n", 308 | " \"\"\" a tree node for MCTS \"\"\"\n", 309 | " \n", 310 | " #metadata:\n", 311 | " parent = None #parent Node\n", 312 | " value_sum = 0. #sum of state values from all visits (numerator)\n", 313 | " times_visited = 0 #counter of visits (denominator)\n", 314 | "\n", 315 | " \n", 316 | " def __init__(self,parent,action,):\n", 317 | " \"\"\"\n", 318 | " Creates and empty node with no children.\n", 319 | " Does so by commiting an action and recording outcome.\n", 320 | " \n", 321 | " :param parent: parent Node\n", 322 | " :param action: action to commit from parent Node\n", 323 | " \n", 324 | " \"\"\"\n", 325 | " \n", 326 | " self.parent = parent\n", 327 | " self.action = action \n", 328 | " self.children = set() #set of child nodes\n", 329 | "\n", 330 | " #get action outcome and save it\n", 331 | " res = env.get_result(parent.snapshot,action)\n", 332 | " self.snapshot,self.observation,self.immediate_reward,self.is_done,_ = res\n", 333 | " \n", 334 | " \n", 335 | " def is_leaf(self):\n", 336 | " return len(self.children)==0\n", 337 | " \n", 338 | " def is_root(self):\n", 339 | " return self.parent is None\n", 340 | " \n", 341 | " def get_mean_value(self):\n", 342 | " return self.value_sum / self.times_visited if self.times_visited !=0 else 0\n", 343 | " \n", 344 | " def ucb_score(self,scale=10,max_value=1e100):\n", 345 | " \"\"\"\n", 346 | " Computes ucb1 upper bound using current value and visit counts for node and it's parent.\n", 347 | " \n", 348 | " :param scale: Multiplies upper bound by that. From hoeffding inequality, assumes reward range to be [0,scale].\n", 349 | " :param max_value: a value that represents infinity (for unvisited nodes)\n", 350 | " \n", 351 | " \"\"\"\n", 352 | " \n", 353 | " if self.times_visited == 0:\n", 354 | " return max_value\n", 355 | " \n", 356 | " #compute ucb-1 additive component (to be added to mean value)\n", 357 | " #hint: you can use self.parent.times_visited for N times node was considered,\n", 358 | " # and self.times_visited for n times it was visited\n", 359 | " \n", 360 | " U = np.sqrt(2*np.log(self.parent.times_visited)/self.times_visited)\n", 361 | " \n", 362 | " return self.get_mean_value() + scale*U\n", 363 | " \n", 364 | " \n", 365 | " #MCTS steps\n", 366 | " \n", 367 | " def select_best_leaf(self):\n", 368 | " \"\"\"\n", 369 | " Picks the leaf with highest priority to expand\n", 370 | " Does so by recursively picking nodes with best UCB-1 score until it reaches the leaf.\n", 371 | " \n", 372 | " \"\"\"\n", 373 | " if self.is_leaf():\n", 374 | " return self\n", 375 | " \n", 376 | " children = list(self.children)\n", 377 | " \n", 378 | " best_child = children[np.argmax([child.ucb_score() for child in children])]\n", 379 | " \n", 380 | " return best_child.select_best_leaf()\n", 381 | " \n", 382 | " def expand(self):\n", 383 | " \"\"\"\n", 384 | " Expands the current node by creating all possible child nodes.\n", 385 | " Then returns one of those children.\n", 386 | " \"\"\"\n", 387 | " \n", 388 | " assert not self.is_done, \"can't expand from terminal state\"\n", 389 | "\n", 390 | " for action in range(n_actions):\n", 391 | " self.children.add(Node(self,action))\n", 392 | " \n", 393 | " return self.select_best_leaf()\n", 394 | " \n", 395 | " def rollout(self,t_max=10**4):\n", 396 | " \"\"\"\n", 397 | " Play the game from this state to the end (done) or for t_max steps.\n", 398 | " \n", 399 | " On each step, pick action at random (hint: env.action_space.sample()).\n", 400 | " \n", 401 | " Compute sum of rewards from current state till \n", 402 | " Note 1: use env.action_space.sample() for random action\n", 403 | " Note 2: if node is terminal (self.is_done is True), just return 0\n", 404 | " \n", 405 | " \"\"\"\n", 406 | " \n", 407 | " #set env into the appropriate state\n", 408 | " env.load_snapshot(self.snapshot)\n", 409 | " obs = self.observation\n", 410 | " is_done = self.is_done\n", 411 | " \n", 412 | " rollout_reward = 0\n", 413 | " snapshot = self.snapshot\n", 414 | " while not is_done and t_max > 0:\n", 415 | " snapshot, s, r, is_done, _ =env.get_result(snapshot, env.action_space.sample())\n", 416 | " rollout_reward += r\n", 417 | " t_max -= 1\n", 418 | "\n", 419 | " return rollout_reward\n", 420 | " \n", 421 | " def propagate(self,child_value):\n", 422 | " \"\"\"\n", 423 | " Uses child value (sum of rewards) to update parents recursively.\n", 424 | " \"\"\"\n", 425 | " #compute node value\n", 426 | " my_value = self.immediate_reward + child_value\n", 427 | " \n", 428 | " #update value_sum and times_visited\n", 429 | " self.value_sum+=my_value\n", 430 | " self.times_visited+=1\n", 431 | " \n", 432 | " #propagate upwards\n", 433 | " if not self.is_root():\n", 434 | " self.parent.propagate(my_value)\n", 435 | " \n", 436 | " def safe_delete(self):\n", 437 | " \"\"\"safe delete to prevent memory leak in some python versions\"\"\"\n", 438 | " del self.parent\n", 439 | " for child in self.children:\n", 440 | " child.safe_delete()\n", 441 | " del child" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 11, 447 | "metadata": { 448 | "collapsed": true 449 | }, 450 | "outputs": [], 451 | "source": [ 452 | "class Root(Node):\n", 453 | " def __init__(self,snapshot,observation):\n", 454 | " \"\"\"\n", 455 | " creates special node that acts like tree root\n", 456 | " :snapshot: snapshot (from env.get_snapshot) to start planning from\n", 457 | " :observation: last environment observation\n", 458 | " \"\"\"\n", 459 | " \n", 460 | " self.parent = self.action = None\n", 461 | " self.children = set() #set of child nodes\n", 462 | " \n", 463 | " #root: load snapshot and observation\n", 464 | " self.snapshot = snapshot\n", 465 | " self.observation = observation\n", 466 | " self.immediate_reward = 0\n", 467 | " self.is_done=False\n", 468 | " \n", 469 | " @staticmethod\n", 470 | " def from_node(node):\n", 471 | " \"\"\"initializes node as root\"\"\"\n", 472 | " root = Root(node.snapshot,node.observation)\n", 473 | " #copy data\n", 474 | " copied_fields = [\"value_sum\",\"times_visited\",\"children\",\"is_done\"]\n", 475 | " for field in copied_fields:\n", 476 | " setattr(root,field,getattr(node,field))\n", 477 | " return root" 478 | ] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "metadata": {}, 483 | "source": [ 484 | "## Main MCTS loop\n", 485 | "\n", 486 | "With all we implemented, MCTS boils down to a trivial piece of code." 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": 34, 492 | "metadata": { 493 | "collapsed": true 494 | }, 495 | "outputs": [], 496 | "source": [ 497 | "def plan_mcts(root,n_iters=10):\n", 498 | " \"\"\"\n", 499 | " builds tree with monte-carlo tree search for n_iters iterations\n", 500 | " :param root: tree node to plan from\n", 501 | " :param n_iters: how many select-expand-simulate-propagete loops to make\n", 502 | " \"\"\"\n", 503 | " for _ in range(n_iters):\n", 504 | "\n", 505 | " node = root.select_best_leaf()\n", 506 | "\n", 507 | " if node.is_done:\n", 508 | " node.propagate(0)\n", 509 | "\n", 510 | " else: #node is not terminal\n", 511 | " next_node = node.expand()\n", 512 | " rollout_reward = next_node.rollout()\n", 513 | " node.propagate(rollout_reward)" 514 | ] 515 | }, 516 | { 517 | "cell_type": "markdown", 518 | "metadata": {}, 519 | "source": [ 520 | "## Plan and execute\n", 521 | "In this section, we use the MCTS implementation to find optimal policy." 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 35, 527 | "metadata": { 528 | "collapsed": true 529 | }, 530 | "outputs": [], 531 | "source": [ 532 | "root_observation = env.reset()\n", 533 | "root_snapshot = env.get_snapshot()\n", 534 | "root = Root(root_snapshot,root_observation)" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": 36, 540 | "metadata": { 541 | "collapsed": true 542 | }, 543 | "outputs": [], 544 | "source": [ 545 | "#plan from root:\n", 546 | "plan_mcts(root,n_iters=1000)" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 37, 552 | "metadata": {}, 553 | "outputs": [ 554 | { 555 | "name": "stdout", 556 | "output_type": "stream", 557 | "text": [ 558 | "Finished with reward = 200.0\n" 559 | ] 560 | } 561 | ], 562 | "source": [ 563 | "from IPython.display import clear_output\n", 564 | "from itertools import count\n", 565 | "from gym.wrappers import Monitor\n", 566 | "\n", 567 | "total_reward = 0 #sum of rewards\n", 568 | "test_env = loads(root_snapshot) #env used to show progress\n", 569 | "\n", 570 | "for i in count():\n", 571 | " \n", 572 | " #get best child\n", 573 | " children = list(root.children)\n", 574 | " best_child = children[np.argmax([child.get_mean_value() for child in children])]\n", 575 | " \n", 576 | " #take action\n", 577 | " s,r,done,_ = test_env.step(best_child.action)\n", 578 | " \n", 579 | " #show image\n", 580 | " #clear_output(True)\n", 581 | " #plt.title(\"step %i\"%i)\n", 582 | " #plt.imshow(test_env.render('rgb_array'))\n", 583 | " #plt.show()\n", 584 | " test_env.close()\n", 585 | "\n", 586 | " total_reward += r\n", 587 | " if done:\n", 588 | " print(\"Finished with reward = \",total_reward)\n", 589 | " break\n", 590 | " \n", 591 | " #discard unrealized part of the tree [because not every child matters :(]\n", 592 | " for child in root.children:\n", 593 | " if child != best_child:\n", 594 | " child.safe_delete()\n", 595 | "\n", 596 | " #declare best child a new root\n", 597 | " root = Root.from_node(best_child)\n", 598 | " \n", 599 | " # assert not root.is_leaf(), \"We ran out of tree! Need more planning! Try growing tree right inside the loop.\"\n", 600 | " \n", 601 | " #you may want to expand tree here\n", 602 | " #\n", 603 | " if root.is_leaf():\n", 604 | " plan_mcts(root,n_iters=10)" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "metadata": {}, 610 | "source": [ 611 | "### Submit to Coursera" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 38, 617 | "metadata": {}, 618 | "outputs": [ 619 | { 620 | "name": "stdout", 621 | "output_type": "stream", 622 | "text": [ 623 | "Submitted to Coursera platform. See results on assignment page!\n" 624 | ] 625 | } 626 | ], 627 | "source": [ 628 | "from submit import submit_mcts\n", 629 | "\n", 630 | "submit_mcts(total_reward, \"matcha.11@samsung.com\", \"G8OJNaEQO0nQ30Vh\")" 631 | ] 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "metadata": { 636 | "collapsed": true 637 | }, 638 | "source": [ 639 | "## More stuff\n", 640 | "\n", 641 | "There's a few things you might want to try if you want to dig deeper:\n", 642 | "\n", 643 | "### Node selection and expansion\n", 644 | "\n", 645 | "\"Analyze this\" assignment\n", 646 | "\n", 647 | "UCB-1 is a weak bound as it relies on a very general bounds (Hoeffding Inequality, to be exact). \n", 648 | "* Try playing with alpha. The theoretically optimal alpha for CartPole is 200 (max reward). \n", 649 | "* Use using a different exploration strategy (bayesian UCB, for example)\n", 650 | "* Expand not all but several random actions per `expand` call. See __the notes below__ for details.\n", 651 | "\n", 652 | "The goal is to find out what gives the optimal performance for `CartPole-v0` for different time budgets (i.e. different n_iter in plan_mcts.\n", 653 | "\n", 654 | "Evaluate your results on `AcroBot-v1` - do the results change and if so, how can you explain it?\n", 655 | "\n", 656 | "\n", 657 | "### Atari-RAM\n", 658 | "\n", 659 | "\"Build this\" assignment\n", 660 | "\n", 661 | "Apply MCTS to play atari games. In particular, let's start with ```gym.make(\"MsPacman-ramDeterministic-v0\")```.\n", 662 | "\n", 663 | "This requires two things:\n", 664 | "* Slightly modify WithSnapshots wrapper to work with atari.\n", 665 | "\n", 666 | " * Atari has a special interface for snapshots:\n", 667 | " ``` \n", 668 | " snapshot = self.env.ale.cloneState()\n", 669 | " ...\n", 670 | " self.env.ale.restoreState(snapshot)\n", 671 | " ```\n", 672 | " * Try it on the env above to make sure it does what you told it to.\n", 673 | " \n", 674 | "* Run MCTS on the game above. \n", 675 | " * Start with small tree size to speed-up computations\n", 676 | " * You will probably want to rollout for 10-100 steps (t_max) for starters\n", 677 | " * Consider using discounted rewards (see __notes at the end__)\n", 678 | " * Try a better rollout policy\n", 679 | " \n", 680 | " \n", 681 | "### Integrate learning into planning\n", 682 | "\n", 683 | "Planning on each iteration is a costly thing to do. You can speed things up drastically if you train a classifier to predict which action will turn out to be best according to MCTS.\n", 684 | "\n", 685 | "To do so, just record which action did the MCTS agent take on each step and fit something to [state, mcts_optimal_action]\n", 686 | "* You can also use optimal actions from discarded states to get more (dirty) samples. Just don't forget to fine-tune without them.\n", 687 | "* It's also worth a try to use P(best_action|state) from your model to select best nodes in addition to UCB\n", 688 | "* If your model is lightweight enough, try using it as a rollout policy.\n", 689 | "\n", 690 | "While CartPole is glorious enough, try expanding this to ```gym.make(\"MsPacmanDeterministic-v0\")```\n", 691 | "* See previous section on how to wrap atari\n", 692 | "\n", 693 | "* Also consider what [AlphaGo Zero](https://deepmind.com/blog/alphago-zero-learning-scratch/) did in this area.\n", 694 | "\n", 695 | "### Integrate planning into learning \n", 696 | "_(this will likely take long time, better consider this as side project when all other deadlines are met)_\n", 697 | "\n", 698 | "Incorporate planning into the agent architecture. \n", 699 | "\n", 700 | "The goal is to implement [Value Iteration Networks](https://arxiv.org/abs/1602.02867)\n", 701 | "\n", 702 | "For starters, remember [week7 assignment](https://github.com/yandexdataschool/Practical_RL/blob/master/week7/7.2_seminar_kung_fu.ipynb)? If not, use [this](http://bit.ly/2oZ34Ap) instead.\n", 703 | "\n", 704 | "You will need to switch it into a maze-like game, consider MsPacman or the games from week7 [Bonus: Neural Maps from here](https://github.com/yandexdataschool/Practical_RL/blob/master/week7/7.3_homework.ipynb).\n", 705 | "\n", 706 | "You will need to implement a special layer that performs value iteration-like update to a recurrent memory. This can be implemented the same way you did attention from week7 or week8." 707 | ] 708 | }, 709 | { 710 | "cell_type": "markdown", 711 | "metadata": {}, 712 | "source": [ 713 | "## Notes\n", 714 | "\n", 715 | "\n", 716 | "#### Assumptions\n", 717 | "\n", 718 | "The full list of assumptions is\n", 719 | "* __Finite actions__ - we enumerate all actions in `expand`\n", 720 | "* __Episodic (finite) MDP__ - while technically it works for infinite mdp, we rollout for $ 10^4$ steps. If you are knowingly infinite, please adjust `t_max` to something more reasonable.\n", 721 | "* __No discounted rewards__ - we assume $\\gamma=1$. If that isn't the case, you only need to change a two lines in `rollout` and use `my_R = r + gamma*child_R` for `propagate`\n", 722 | "* __pickleable env__ - won't work if e.g. your env is connected to a web-browser surfing the internet. For custom envs, you may need to modify get_snapshot/load_snapshot from `WithSnapshots`.\n", 723 | "\n", 724 | "#### On `get_best_leaf` and `expand` functions\n", 725 | "\n", 726 | "This MCTS implementation only selects leaf nodes for expansion.\n", 727 | "This doesn't break things down because `expand` adds all possible actions. Hence, all non-leaf nodes are by design fully expanded and shouldn't be selected.\n", 728 | "\n", 729 | "If you want to only add a few random action on each expand, you will also have to modify `get_best_leaf` to consider returning non-leafs.\n", 730 | "\n", 731 | "#### Rollout policy\n", 732 | "\n", 733 | "We use a simple uniform policy for rollouts. This introduces a negative bias to good situations that can be messed up completely with random bad action. As a simple example, if you tend to rollout with uniform policy, you better don't use sharp knives and walk near cliffs.\n", 734 | "\n", 735 | "You can improve that by integrating a reinforcement _learning_ algorithm with a computationally light agent. You can even train this agent on optimal policy found by the tree search.\n", 736 | "\n", 737 | "#### Contributions\n", 738 | "* Reusing some code from 5vision [solution for deephack.RL](https://github.com/5vision/uct_atari), code by Mikhail Pavlov\n", 739 | "* Using some code from [this gist](https://gist.github.com/blole/dfebbec182e6b72ec16b66cc7e331110)" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": null, 745 | "metadata": { 746 | "collapsed": true 747 | }, 748 | "outputs": [], 749 | "source": [] 750 | } 751 | ], 752 | "metadata": { 753 | "kernelspec": { 754 | "display_name": "Python 3", 755 | "language": "python", 756 | "name": "python3" 757 | }, 758 | "language_info": { 759 | "codemirror_mode": { 760 | "name": "ipython", 761 | "version": 3 762 | }, 763 | "file_extension": ".py", 764 | "mimetype": "text/x-python", 765 | "name": "python", 766 | "nbconvert_exporter": "python", 767 | "pygments_lexer": "ipython3", 768 | "version": "3.6.2" 769 | } 770 | }, 771 | "nbformat": 4, 772 | "nbformat_minor": 1 773 | } 774 | --------------------------------------------------------------------------------