├── requirements.txt
├── .DS_Store
├── deeprl_hw1
    ├── __init__.py
    ├── rl1.pyc
    ├── .DS_Store
    ├── __init__.pyc
    ├── lake_envs.pyc
    ├── queue_envs.pyc
    ├── rlvaliterchngd.pyc
    ├── driver3.py
    ├── lake_envs.py
    ├── rl.py
    ├── queue_envs.py
    ├── rl1.py
    └── rlvaliterchngd.py
├── results
    ├── .DS_Store
    ├── 2ca4x4.png
    ├── results.docx
    ├── part a4x4
    │   ├── 2i.png
    │   ├── 2c4x4.png
    │   ├── 2e4x4.png
    │   ├── 1gvalue.csv
    │   └── 1gpolicy.csv
    ├── part a4x4 with max
    │   ├── 1c.png
    │   ├── 1e.png
    │   ├── 1gpolicy.csv
    │   └── 1gvalue.csv
    ├── part a8x8 with max
    │   ├── .DS_Store
    │   ├── 2c8x8.png
    │   ├── 2e8x8.png
    │   ├── 1gpolicy.csv
    │   └── 1gvalue.csv
    ├── Deterministic-4x4-neg-reward-FrozenLake-v0.png
    ├── Deterministic-4x4-neg-reward-FrozenLake-v0gamma0.16.png
    ├── Deterministic-4x4-neg-reward-FrozenLake-v0_2cvaluegamma0.16.csv
    ├── Stochastic-4x4-FrozenLake-v0_2bvalue.csv
    └── Deterministic-4x4-neg-reward-FrozenLake-v0_2cvalue.csv
├── Stochastic-4x4-FrozenLake-v0.png
├── .ipynb_checkpoints
    └── 21-checkpoint.ipynb
├── Deterministic-4x4-neg-reward-FrozenLake-v0.png
├── setup.py
├── .idea
    ├── modules.xml
    ├── misc.xml
    ├── deeprl_hw1_src.iml
    ├── inspectionProfiles
    │   └── Project_Default.xml
    └── workspace.xml
├── README.md
├── example.py
├── DeterministicFrozenLake.py
├── StochasticFrozenLake.py
├── DeterministicFrozenNegReward.py
└── 21.ipynb


/requirements.txt:
--------------------------------------------------------------------------------
1 | future
2 | gym
3 | numpy
4 | six
5 | -e .
6 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/.DS_Store


--------------------------------------------------------------------------------
/deeprl_hw1/__init__.py:
--------------------------------------------------------------------------------
1 | import deeprl_hw1.lake_envs
2 | import deeprl_hw1.queue_envs
3 | 


--------------------------------------------------------------------------------
/results/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/.DS_Store


--------------------------------------------------------------------------------
/deeprl_hw1/rl1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/deeprl_hw1/rl1.pyc


--------------------------------------------------------------------------------
/results/2ca4x4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/2ca4x4.png


--------------------------------------------------------------------------------
/deeprl_hw1/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/deeprl_hw1/.DS_Store


--------------------------------------------------------------------------------
/results/results.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/results.docx


--------------------------------------------------------------------------------
/deeprl_hw1/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/deeprl_hw1/__init__.pyc


--------------------------------------------------------------------------------
/deeprl_hw1/lake_envs.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/deeprl_hw1/lake_envs.pyc


--------------------------------------------------------------------------------
/results/part a4x4/2i.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/part a4x4/2i.png


--------------------------------------------------------------------------------
/deeprl_hw1/queue_envs.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/deeprl_hw1/queue_envs.pyc


--------------------------------------------------------------------------------
/results/part a4x4/2c4x4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/part a4x4/2c4x4.png


--------------------------------------------------------------------------------
/results/part a4x4/2e4x4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/part a4x4/2e4x4.png


--------------------------------------------------------------------------------
/deeprl_hw1/rlvaliterchngd.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/deeprl_hw1/rlvaliterchngd.pyc


--------------------------------------------------------------------------------
/Stochastic-4x4-FrozenLake-v0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/Stochastic-4x4-FrozenLake-v0.png


--------------------------------------------------------------------------------
/results/part a4x4 with max/1c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/part a4x4 with max/1c.png


--------------------------------------------------------------------------------
/results/part a4x4 with max/1e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/part a4x4 with max/1e.png


--------------------------------------------------------------------------------
/results/part a8x8 with max/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/part a8x8 with max/.DS_Store


--------------------------------------------------------------------------------
/results/part a8x8 with max/2c8x8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/part a8x8 with max/2c8x8.png


--------------------------------------------------------------------------------
/results/part a8x8 with max/2e8x8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/part a8x8 with max/2e8x8.png


--------------------------------------------------------------------------------
/.ipynb_checkpoints/21-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 0
6 | }
7 | 


--------------------------------------------------------------------------------
/results/part a4x4/1gvalue.csv:
--------------------------------------------------------------------------------
1 | 0.592,0.657,0.730,0.657
2 | 0.657,0.001,0.811,0.002
3 | 0.730,0.811,0.901,0.002
4 | 0.007,0.901,1.001,0.001


--------------------------------------------------------------------------------
/Deterministic-4x4-neg-reward-FrozenLake-v0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/Deterministic-4x4-neg-reward-FrozenLake-v0.png


--------------------------------------------------------------------------------
/results/part a4x4/1gpolicy.csv:
--------------------------------------------------------------------------------
1 | 0.5916,0.6572,0.7301,0.6571
2 | 0.6572,0.0005,0.8111,0.0023
3 | 0.7301,0.8111,0.9011,0.0021
4 | 0.0074,0.9011,1.0011,0.0011


--------------------------------------------------------------------------------
/results/Deterministic-4x4-neg-reward-FrozenLake-v0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/Deterministic-4x4-neg-reward-FrozenLake-v0.png


--------------------------------------------------------------------------------
/results/Deterministic-4x4-neg-reward-FrozenLake-v0gamma0.16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aaksham/frozenlake/HEAD/results/Deterministic-4x4-neg-reward-FrozenLake-v0gamma0.16.png


--------------------------------------------------------------------------------
/results/Deterministic-4x4-neg-reward-FrozenLake-v0_2cvaluegamma0.16.csv:
--------------------------------------------------------------------------------
1 | -0.999935,0.000065,-0.999961,0.000039
2 | 0.000065,0.000065,0.000039,0.000039
3 | 0.000046,0.000010,0.000101,0.000101
4 | 0.000046,0.000007,1.190390,1.190390


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from distutils.core import setup
 4 | 
 5 | setup(
 6 |     name='DeepRL Homework 1',
 7 |     version='1.0',
 8 |     description='Library for 10-703 Homework 1',
 9 |     packages=['deeprl_hw1'])
10 | 


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/deeprl_hw1_src.iml" filepath="$PROJECT_DIR$/.idea/deeprl_hw1_src.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.12 (~/anaconda/bin/python)" project-jdk-type="Python SDK" />
4 |   <component name="PythonCompatibilityInspectionAdvertiser">
5 |     <option name="version" value="1" />
6 |   </component>
7 | </project>


--------------------------------------------------------------------------------
/results/part a4x4 with max/1gpolicy.csv:
--------------------------------------------------------------------------------
1 | 5.923973887191247290e-01,6.580073887191245641e-01,7.309073887191246399e-01,6.578166498472122203e-01
2 | 6.580073887191245641e-01,5.799280908488830578e-03,8.119073887191247119e-01,7.018522510759056705e-03
3 | 7.309073887191246399e-01,8.119073887191247119e-01,9.019073887191245698e-01,4.045348128936829789e-03
4 | 8.663607960043642059e-03,9.019073887191245698e-01,1.001907388719124548e+00,1.907388719124641875e-03
5 | 


--------------------------------------------------------------------------------
/results/part a4x4 with max/1gvalue.csv:
--------------------------------------------------------------------------------
1 | 5.940389180249019407e-01,6.596489180249021089e-01,7.325489180249019627e-01,6.592940262224117332e-01
2 | 6.596489180249021089e-01,5.975301847172851538e-03,8.135489180249019237e-01,6.305401447811602444e-03
3 | 7.325489180249019627e-01,8.135489180249019237e-01,9.035489180249020036e-01,8.526760647121702126e-04
4 | 8.310233909333586064e-03,9.035489180249020036e-01,1.003548918024901981e+00,3.548918024901988755e-03
5 | 


--------------------------------------------------------------------------------
/deeprl_hw1/driver3.py:
--------------------------------------------------------------------------------
 1 | import deeprl_hw1.queue_envs as qenv
 2 | import numpy
 3 | P1 = 0.1
 4 | P2 = 0.9
 5 | P3 = 0.1
 6 | 
 7 | env=qenv.QueueEnv(P1,P2,P3)
 8 | #ps=env.query_model((1,0,0,0),1)
 9 | #print ps
10 | ps=env.query_model((1,5,3,4),3)
11 | print ps
12 | numpy.random.seed(0)
13 | env.reset()
14 | env.render()
15 | env._step(1)
16 | env.render()
17 | env._step(3)
18 | env.render()
19 | #
20 | # ps=env.query_model((1,5,5,5),3)
21 | # print ps
22 | 


--------------------------------------------------------------------------------
/results/Stochastic-4x4-FrozenLake-v0_2bvalue.csv:
--------------------------------------------------------------------------------
1 | 7.210080319262232584e-02,6.500778087490913237e-02,7.867964029828880546e-02,5.967327973610045411e-02
2 | 9.499989900152405742e-02,8.464270518010267794e-03,1.174987390033538359e-01,2.065368390832942116e-03
3 | 1.488080916636685680e-01,2.513906886597030987e-01,3.040243032811825175e-01,1.272454070772601007e-03
4 | 6.766787927394104021e-03,3.841673769823148454e-01,6.439724196755912677e-01,6.094194795894227086e-03
5 | 


--------------------------------------------------------------------------------
/results/Deterministic-4x4-neg-reward-FrozenLake-v0_2cvalue.csv:
--------------------------------------------------------------------------------
1 | -9.999608187719284391e-01,3.918122807154976087e-05,-9.999774521696255247e-01,2.254783037451089448e-05
2 | 3.918122807154976087e-05,3.918122807154976087e-05,2.254783037451089448e-05,2.254783037451089448e-05
3 | 9.128853355511469254e-05,6.268996491447961536e-06,5.862065414633444213e-05,5.862065414633444213e-05
4 | 9.128853355511469254e-05,1.460616536881835047e-05,1.190425913320509954e+00,1.190425913320509954e+00
5 | 


--------------------------------------------------------------------------------
/.idea/deeprl_hw1_src.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="projectConfiguration" value="Nosetests" />
10 |     <option name="PROJECT_TEST_RUNNER" value="Nosetests" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyCompatibilityInspection" enabled="true" level="WARNING" enabled_by_default="true">
 5 |       <option name="ourVersions">
 6 |         <value>
 7 |           <list size="2">
 8 |             <item index="0" class="java.lang.String" itemvalue="2.7" />
 9 |             <item index="1" class="java.lang.String" itemvalue="3.6" />
10 |           </list>
11 |         </value>
12 |       </option>
13 |     </inspection_tool>
14 |   </profile>
15 | </component>


--------------------------------------------------------------------------------
/results/part a8x8 with max/1gpolicy.csv:
--------------------------------------------------------------------------------
1 | 2.579595231504817621e-01,2.862024767985817952e-01,3.175835364075818013e-01,3.524513804175817833e-01,3.911934293175818311e-01,4.342401503175817856e-01,4.820698403175819879e-01,5.352139403175819599e-01
2 | 2.862024767985817952e-01,3.175835364075818013e-01,3.524513804175817833e-01,3.911934293175818311e-01,4.342401503175817856e-01,4.820698403175819879e-01,5.352139403175819599e-01,5.942629403175818670e-01
3 | 3.175835364075818013e-01,3.524513804175817833e-01,3.911934293175818311e-01,3.767824654025735392e-03,4.820698403175819879e-01,5.352139403175819599e-01,5.942629403175818670e-01,6.598729403175817021e-01
4 | 3.524513804175817833e-01,3.911934293175818311e-01,4.342401503175817856e-01,4.820698403175819879e-01,5.352139403175819599e-01,3.942658874654073053e-03,6.598729403175817021e-01,7.327729403175817779e-01
5 | 3.172062423758236216e-01,3.520740863858236591e-01,3.908161352858235960e-01,9.125019350097088961e-04,5.942629403175818670e-01,6.598729403175817021e-01,7.327729403175817779e-01,8.137729403175818499e-01
6 | 2.854856181382412483e-01,8.370799156727023668e-03,6.671803637201679030e-03,5.942629403175818670e-01,6.598729403175817021e-01,7.327729403175817779e-01,7.436799765772327507e-03,9.037729403175817078e-01
7 | 3.168666777472413099e-01,4.493092146848743848e-03,4.816925462858237528e-01,5.348366462858237247e-01,7.133581024200938078e-03,8.137729403175818499e-01,2.763054665441602568e-04,1.003772940317581686e+00
8 | 3.517345217572412364e-01,3.904765706572413952e-01,4.335232916572414053e-01,5.762616389577770136e-03,8.137729403175818499e-01,9.037729403175817078e-01,1.003772940317581686e+00,3.772940317581741088e-03
9 | 


--------------------------------------------------------------------------------
/results/part a8x8 with max/1gvalue.csv:
--------------------------------------------------------------------------------
1 | 2.542550444410008881e-01,2.824979980891009212e-01,3.138790576981008718e-01,3.487469017081009648e-01,3.874889506081009016e-01,4.305356716081008006e-01,4.783653616081008364e-01,5.315094616081008638e-01
2 | 2.824979980891009212e-01,3.138790576981008718e-01,3.487469017081009648e-01,3.874889506081009016e-01,4.305356716081008006e-01,4.783653616081008364e-01,5.315094616081008638e-01,5.905584616081007709e-01
3 | 3.138790576981008718e-01,3.487469017081009648e-01,3.874889506081009016e-01,7.373080087904547095e-03,4.783653616081008364e-01,5.315094616081008638e-01,5.905584616081007709e-01,6.561684616081009391e-01
4 | 3.487469017081009648e-01,3.874889506081009016e-01,4.305356716081008006e-01,4.783653616081008364e-01,5.315094616081008638e-01,1.307747491744603533e-03,6.561684616081009391e-01,7.290684616081009040e-01
5 | 3.138722115372908905e-01,3.487400555472908170e-01,3.874821044472907539e-01,1.526370220697159533e-03,5.905584616081007709e-01,6.561684616081009391e-01,7.290684616081009040e-01,8.100684616081009759e-01
6 | 2.824849903835617848e-01,4.610209748014226609e-03,1.384175249950048581e-03,5.905584616081007709e-01,6.561684616081009391e-01,7.290684616081009040e-01,1.963379439910021348e-04,9.000684616081008338e-01
7 | 3.138660499925617353e-01,3.178091108843970371e-03,4.783585154472907996e-01,5.315026154472907161e-01,8.674123898821699957e-03,8.100684616081009759e-01,5.057248319540319850e-03,1.000068461608100812e+00
8 | 3.487338940025616618e-01,3.874759429025617097e-01,4.305226639025617197e-01,6.594137748716060420e-03,8.100684616081009759e-01,9.000684616081008338e-01,1.000068461608100812e+00,6.846160810080188482e-05
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Reinforcement Learning
 2 | 
 3 | ## OpenAI Gym Environments
 4 | ### Creating the environments
 5 | 
 6 | To create the environment use the following code snippet:
 7 | 
 8 | ```
 9 | import gym
10 | import deeprl_hw1.envs
11 | 
12 | env = gym.make('Deterministic-4x4-FrozenLake-v0')
13 | ```
14 | 
15 | ### Actions
16 | 
17 | There are four actions: LEFT, UP, DOWN, RIGHT represented as
18 | integers. The `deep_rl_hw1.envs` contains variables to reference
19 | these. For example:
20 | 
21 | ```
22 | print(deeprl_hw1.envs.LEFT)
23 | ```
24 | 
25 | will print out the number 0.
26 | 
27 | ### Environment Attributes
28 | 
29 | This class contains the following important attributes:
30 | 
31 | - `nS` :: number of states
32 | - `nA` :: number of actions
33 | - `P` :: transitions, rewards, terminals
34 | 
35 | The `P` attribute will be the most important for your implementation
36 | of value iteration and policy iteration. This attribute contains the
37 | model for the particular map instance. It is a dictionary of
38 | dictionary of lists with the following form:
39 | 
40 | ```
41 | P[s][a] = [(prob, nextstate, reward, is_terminal), ...]
42 | ```
43 | 
44 | For example, to get the probability of taking action LEFT in state 0
45 | you would use the following code:
46 | 
47 | ```
48 | env.P[0][deeprl_hw1.envs.LEFT]
49 | ```
50 | 
51 | This would return the list: `[(1.0, 0, 0.0, False)]` for the
52 | `Deterministic-4x4-FrozenLake-v0` domain. There is one tuple in the
53 | list, so there is only one possible next state. The next state will be
54 | state 0, according to the second number in the tuple. This will be the
55 | next state 100\% of the time according to the first number in the
56 | tuple. The reward function for this state action pair `R(0,LEFT) = 0`
57 | according to the third number. The final tuple value says that the
58 | next state is not terminal.
59 | 
60 | ##
61 | ### Running a random policy
62 | 
63 | example.py has an example of how to run a random policy on the domain.
64 | 
65 | #Value Iteration
66 | The optimal policies for the different environments is in the <environment>.py files.
67 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from __future__ import (absolute_import, division, print_function,
 5 |                         unicode_literals)
 6 | from builtins import input
 7 | 
 8 | import deeprl_hw1.lake_envs as lake_env
 9 | import gym
10 | import time
11 | 
12 | 
13 | def run_random_policy(env):
14 |     """Run a random policy for the given environment.
15 | 
16 |     Logs the total reward and the number of steps until the terminal
17 |     state was reached.
18 | 
19 |     Parameters
20 |     ----------
21 |     env: gym.envs.Environment
22 |       Instance of an OpenAI gym.
23 | 
24 |     Returns
25 |     -------
26 |     (float, int)
27 |       First number is the total undiscounted reward received. The
28 |       second number is the total number of actions taken before the
29 |       episode finished.
30 |     """
31 |     initial_state = env.reset()
32 |     env.render()
33 |     time.sleep(1)  # just pauses so you can see the output
34 | 
35 |     total_reward = 0
36 |     num_steps = 0
37 |     while True:
38 |         nextstate, reward, is_terminal, debug_info = env.step(
39 |             env.action_space.sample())
40 |         env.render()
41 | 
42 |         total_reward += reward
43 |         num_steps += 1
44 | 
45 |         if is_terminal:
46 |             break
47 | 
48 |         time.sleep(1)
49 | 
50 |     return total_reward, num_steps
51 | 
52 | 
53 | def print_env_info(env):
54 |     print('Environment has %d states and %d actions.' % (env.nS, env.nA))
55 | 
56 | 
57 | def print_model_info(env, state, action):
58 |     transition_table_row = env.P[state][action]
59 |     print(
60 |         ('According to transition function, '
61 |          'taking action %s(%d) in state %d leads to'
62 |          ' %d possible outcomes') % (lake_env.action_names[action],
63 |                                      action, state, len(transition_table_row)))
64 |     for prob, nextstate, reward, is_terminal in transition_table_row:
65 |         state_type = 'terminal' if is_terminal else 'non-terminal'
66 |         print(
67 |             '\tTransitioning to %s state %d with probability %f and reward %f'
68 |             % (state_type, nextstate, prob, reward))
69 | 
70 | 
71 | def main():
72 |     # create the environment
73 |     env = gym.make('FrozenLake-v0')
74 |     # uncomment next line to try the deterministic version
75 |     # env = gym.make('Deterministic-4x4-FrozenLake-v0')
76 | 
77 |     print_env_info(env)
78 |     print_model_info(env, 0, lake_env.DOWN)
79 |     print_model_info(env, 1, lake_env.DOWN)
80 |     print_model_info(env, 14, lake_env.RIGHT)
81 | 
82 |     input('Hit enter to run a random policy...')
83 | 
84 |     total_reward, num_steps = run_random_policy(env)
85 |     print('Agent received total reward of: %f' % total_reward)
86 |     print('Agent took %d steps' % num_steps)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     main()
91 | 


--------------------------------------------------------------------------------
/DeterministicFrozenLake.py:
--------------------------------------------------------------------------------
 1 | import deeprl_hw1.lake_envs as lake_env
 2 | import gym
 3 | import time
 4 | import seaborn
 5 | from tabulate import tabulate
 6 | import matplotlib.pyplot as plt
 7 | from deeprl_hw1.rl1 import *
 8 | 
 9 | def run_policy(env,gamma,policy):
10 |     initial_state = env.reset()
11 |     env.render()
12 |     time.sleep(1)  # just pauses so you can see the output
13 | 
14 |     total_reward = 0
15 |     num_steps = 0
16 |     current_state=initial_state
17 |     while True:
18 |         nextstate, reward, is_terminal, debug_info = env.step(policy[current_state])
19 |         env.render()
20 | 
21 |         total_reward += math.pow(gamma,num_steps)*reward
22 |         num_steps += 1
23 | 
24 |         if is_terminal:
25 |             break
26 | 
27 |         current_state=nextstate
28 |         time.sleep(1)
29 | 
30 |     return total_reward, num_steps
31 | 
32 | grid=8
33 | envname='Deterministic-'+str(grid)+'x'+str(grid)+'-FrozenLake-v0'
34 | env = gym.make(envname)
35 | env.render()
36 | gamma=0.9
37 | print "Executing Policy Iteration"
38 | start_time=time.time()
39 | policy, value_func, policy_iters, val_iters= policy_iteration(env,gamma)
40 | print "Total time taken: "+str((time.time()-start_time))
41 | print "Total Policy Improvement Steps: "+str(policy_iters)
42 | print "Total Policy Evaluation Steps: "+str(val_iters)
43 | print "Policy:"
44 | policy_str=print_policy(policy,lake_env.action_names)
45 | ps=[]
46 | for elem in policy_str:
47 |     ps.append(elem[0])
48 | reshaped_policy=np.reshape(ps,(grid,grid))
49 | print tabulate(reshaped_policy,tablefmt='latex')
50 | f, ax = plt.subplots(figsize=(11, 9))
51 | cmap = seaborn.diverging_palette(220, 10, as_cmap=True)
52 | reshaped=np.reshape(value_func,(grid,grid))
53 | seaborn.heatmap(reshaped, cmap=cmap, vmax=1.1,
54 |             square=True, xticklabels=grid+1, yticklabels=grid+1,
55 |             linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
56 | plt.savefig('1c.png',bbox_inches='tight')
57 | np.savetxt('1gpolicy.csv',reshaped,delimiter=',')
58 | 
59 | print "Executing Value Iteration"
60 | start_time=time.time()
61 | value_function,value_iters=value_iteration(env,gamma)
62 | print "Total time taken: "+str((time.time()-start_time))
63 | print "Total Value Iteration Steps: "+str(value_iters)
64 | print "Policy:"
65 | policy=value_function_to_policy(env,gamma,value_function)
66 | policy_str=print_policy(policy,lake_env.action_names)
67 | ps=[]
68 | for elem in policy_str:
69 |     ps.append(elem[0])
70 | reshaped_policy=np.reshape(ps,(grid,grid))
71 | print tabulate(reshaped_policy,tablefmt='latex')
72 | f, ax = plt.subplots(figsize=(11, 9))
73 | cmap = seaborn.diverging_palette(220, 10, as_cmap=True)
74 | reshaped=np.reshape(value_function,(grid,grid))
75 | seaborn.heatmap(reshaped, cmap=cmap, vmax=1.1,
76 |             square=True, xticklabels=grid+1, yticklabels=grid+1,
77 |             linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
78 | plt.savefig('1e.png',bbox_inches='tight')
79 | np.savetxt('1gvalue.csv',reshaped,delimiter=',')
80 | 
81 | cum_reward,nsteps=run_policy(env,gamma,policy)
82 | print "Cumulative Reward: "+str(cum_reward)
83 | print "No. of steps: "+str(nsteps)
84 | 
85 | 


--------------------------------------------------------------------------------
/deeprl_hw1/lake_envs.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """Defines some frozen lake maps."""
 3 | 
 4 | from __future__ import (absolute_import, division, print_function,
 5 |                         unicode_literals)
 6 | 
 7 | from gym.envs.toy_text.frozen_lake import LEFT, RIGHT, DOWN, UP
 8 | from gym.envs.toy_text import frozen_lake, discrete
 9 | 
10 | from gym.envs.registration import register
11 | 
12 | action_names = {LEFT: 'LEFT', RIGHT: 'RIGHT', DOWN: 'DOWN', UP: 'UP'}
13 | 
14 | register(
15 |     id='Deterministic-4x4-FrozenLake-v0',
16 |     entry_point='gym.envs.toy_text.frozen_lake:FrozenLakeEnv',
17 |     kwargs={'map_name': '4x4',
18 |             'is_slippery': False})
19 | 
20 | register(
21 |     id='Deterministic-8x8-FrozenLake-v0',
22 |     entry_point='gym.envs.toy_text.frozen_lake:FrozenLakeEnv',
23 |     kwargs={'map_name': '8x8',
24 |             'is_slippery': False})
25 | 
26 | register(
27 |     id='Stochastic-4x4-FrozenLake-v0',
28 |     entry_point='gym.envs.toy_text.frozen_lake:FrozenLakeEnv',
29 |     kwargs={'map_name': '4x4',
30 |             'is_slippery': True})
31 | 
32 | register(
33 |     id='Stochastic-8x8-FrozenLake-v0',
34 |     entry_point='gym.envs.toy_text.frozen_lake:FrozenLakeEnv',
35 |     kwargs={'map_name': '8x8',
36 |             'is_slippery': True})
37 | 
38 | 
39 | class NegRewardFrozenLake(frozen_lake.FrozenLakeEnv):
40 |     def __init__(self, **kwargs):
41 |         super(NegRewardFrozenLake, self).__init__(**kwargs)
42 | 
43 |         # modify the rewards
44 |         for state in range(self.nS):
45 |             for action in range(self.nA):
46 |                 new_transitions = []
47 |                 for (prob, nextstate, _, is_terminal) in self.P[state][action]:
48 |                     row = nextstate // self.ncol
49 |                     col = nextstate - row * self.ncol
50 |                     tile_type = self.desc[row, col]
51 |                     if tile_type == 'F' or tile_type == 'S':
52 |                         reward = -1
53 |                     elif tile_type == 'G':
54 |                         reward = 1
55 |                     else:
56 |                         reward = 0
57 | 
58 |                     new_transitions.append(
59 |                         (prob, nextstate, reward, is_terminal))
60 |                 self.P[state][action] = new_transitions
61 | 
62 | 
63 | register(
64 |     id='Deterministic-4x4-neg-reward-FrozenLake-v0',
65 |     entry_point='deeprl_hw1.lake_envs:NegRewardFrozenLake',
66 |     kwargs={'map_name': '4x4',
67 |             'is_slippery': False})
68 | 
69 | register(
70 |     id='Stochastic-4x4-neg-reward-FrozenLake-v0',
71 |     entry_point='deeprl_hw1.lake_envs:NegRewardFrozenLake',
72 |     kwargs={'map_name': '4x4',
73 |             'is_slippery': True})
74 | 
75 | register(
76 |     id='Deterministic-8x8-neg-reward-FrozenLake-v0',
77 |     entry_point='deeprl_hw1.lake_envs:NegRewardFrozenLake',
78 |     kwargs={'map_name': '8x8',
79 |             'is_slippery': False})
80 | 
81 | register(
82 |     id='Stochastic-8x8-neg-reward-FrozenLake-v0',
83 |     entry_point='deeprl_hw1.lake_envs:NegRewardFrozenLake',
84 |     kwargs={'map_name': '8x8',
85 |             'is_slippery': True})
86 | 


--------------------------------------------------------------------------------
/StochasticFrozenLake.py:
--------------------------------------------------------------------------------
 1 | import deeprl_hw1.lake_envs as lake_env
 2 | import gym
 3 | import time
 4 | import seaborn
 5 | from tabulate import tabulate
 6 | import matplotlib.pyplot as plt
 7 | from deeprl_hw1.rlvaliterchngd import *
 8 | 
 9 | def run_policy(env,gamma,policy):
10 |     initial_state = env.reset()
11 |     #env.render()
12 |     time.sleep(1)  # just pauses so you can see the output
13 | 
14 |     total_reward = 0
15 |     num_steps = 0
16 |     current_state=initial_state
17 |     while True:
18 |         nextstate, reward, is_terminal, debug_info = env.step(policy[current_state])
19 |         #env.render()
20 | 
21 |         total_reward += math.pow(gamma,num_steps)*reward
22 |         num_steps += 1
23 | 
24 |         if is_terminal:
25 |             break
26 | 
27 |         current_state=nextstate
28 |         time.sleep(1)
29 | 
30 |     return total_reward, num_steps
31 | 
32 | grid=4
33 | envname='Stochastic-'+str(grid)+'x'+str(grid)+'-FrozenLake-v0'
34 | env = gym.make(envname)
35 | env.render()
36 | gamma=0.9
37 | 
38 | # print "Executing Policy Iteration"
39 | # start_time=time.time()
40 | # policy, value_func, policy_iters, val_iters= policy_iteration(env,gamma)
41 | # print "Total time taken: "+str((time.time()-start_time))
42 | # print "Total Policy Improvement Steps: "+str(policy_iters)
43 | # print "Total Policy Evaluation Steps: "+str(val_iters)
44 | # print "Policy:"
45 | # policy_str=print_policy(policy,lake_env.action_names)
46 | # ps=[]
47 | # for elem in policy_str:
48 | #     ps.append(elem[0])
49 | # reshaped_policy=np.reshape(ps,(grid,grid))
50 | # print tabulate(reshaped_policy,tablefmt='latex')
51 | # f, ax = plt.subplots(figsize=(11, 9))
52 | # cmap = seaborn.diverging_palette(220, 10, as_cmap=True)
53 | # reshaped=np.reshape(value_func,(grid,grid))
54 | # seaborn.heatmap(reshaped, cmap=cmap, vmax=1.1,
55 | #             square=True, xticklabels=grid+1, yticklabels=grid+1,
56 | #             linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
57 | # plt.savefig('1c.png',bbox_inches='tight')
58 | # np.savetxt('1gpolicy.csv',reshaped,delimiter=',')
59 | 
60 | print "Executing Value Iteration"
61 | start_time=time.time()
62 | value_function,value_iters=value_iteration(env,gamma)
63 | print "Total time taken: "+str((time.time()-start_time))
64 | print "Total Value Iteration Steps: "+str(value_iters)
65 | print "Policy:"
66 | policy=value_function_to_policy(env,gamma,value_function)
67 | policy_str=print_policy(policy,lake_env.action_names)
68 | ps=[]
69 | for elem in policy_str:
70 |     ps.append(elem[0])
71 | reshaped_policy=np.reshape(ps,(grid,grid))
72 | print tabulate(reshaped_policy,tablefmt='latex')
73 | f, ax = plt.subplots(figsize=(11, 9))
74 | cmap = seaborn.diverging_palette(220, 10, as_cmap=True)
75 | reshaped=np.reshape(value_function,(grid,grid))
76 | seaborn.heatmap(reshaped, cmap=cmap, vmax=1.1,
77 |             square=True, xticklabels=grid+1, yticklabels=grid+1,
78 |             linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
79 | plt.savefig(envname+'.png',bbox_inches='tight')
80 | np.savetxt(envname+'_2bvalue.csv',reshaped,delimiter=',')
81 | 
82 | total_cum_reward=0
83 | maxn=5
84 | start_time=time.time()
85 | for n in range(maxn):
86 |     cum_reward,nsteps=run_policy(env,gamma,policy)
87 |     total_cum_reward+=cum_reward
88 |     if n%1==0: print "Done "+str(n)
89 | print ("Time: "+str((time.time()-start_time)/60))
90 | 
91 | print "Average Cumulative Reward: "+str((total_cum_reward/maxn))
92 | print "No. of steps: "+str(nsteps)
93 | 
94 | 


--------------------------------------------------------------------------------
/DeterministicFrozenNegReward.py:
--------------------------------------------------------------------------------
 1 | import deeprl_hw1.lake_envs as lake_env
 2 | import gym
 3 | import time
 4 | import seaborn
 5 | from tabulate import tabulate
 6 | import matplotlib.pyplot as plt
 7 | from deeprl_hw1.rlvaliterchngd import *
 8 | 
 9 | def run_policy(env,gamma,policy):
10 |     initial_state = env.reset()
11 |     #env.render()
12 |     time.sleep(1)  # just pauses so you can see the output
13 | 
14 |     total_reward = 0
15 |     num_steps = 0
16 |     current_state=initial_state
17 |     while True:
18 |         nextstate, reward, is_terminal, debug_info = env.step(policy[current_state])
19 |         #env.render()
20 | 
21 |         total_reward += math.pow(gamma,num_steps)*reward
22 |         num_steps += 1
23 | 
24 |         if is_terminal:
25 |             break
26 | 
27 |         current_state=nextstate
28 |         time.sleep(1)
29 | 
30 |     return total_reward, num_steps
31 | 
32 | grid=4
33 | envname='Deterministic-4x4-neg-reward-FrozenLake-v0'
34 | env = gym.make(envname)
35 | env.render()
36 | gamma=0.16
37 | 
38 | # print "Executing Policy Iteration"
39 | # start_time=time.time()
40 | # policy, value_func, policy_iters, val_iters= policy_iteration(env,gamma)
41 | # print "Total time taken: "+str((time.time()-start_time))
42 | # print "Total Policy Improvement Steps: "+str(policy_iters)
43 | # print "Total Policy Evaluation Steps: "+str(val_iters)
44 | # print "Policy:"
45 | # policy_str=print_policy(policy,lake_env.action_names)
46 | # ps=[]
47 | # for elem in policy_str:
48 | #     ps.append(elem[0])
49 | # reshaped_policy=np.reshape(ps,(grid,grid))
50 | # print tabulate(reshaped_policy,tablefmt='latex')
51 | # f, ax = plt.subplots(figsize=(11, 9))
52 | # cmap = seaborn.diverging_palette(220, 10, as_cmap=True)
53 | # reshaped=np.reshape(value_func,(grid,grid))
54 | # seaborn.heatmap(reshaped, cmap=cmap, vmax=1.1,
55 | #             square=True, xticklabels=grid+1, yticklabels=grid+1,
56 | #             linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
57 | # plt.savefig('1c.png',bbox_inches='tight')
58 | # np.savetxt('1gpolicy.csv',reshaped,delimiter=',')
59 | 
60 | print "Executing Value Iteration"
61 | start_time=time.time()
62 | value_function,value_iters=value_iteration(env,gamma)
63 | print "Total time taken: "+str((time.time()-start_time))
64 | print "Total Value Iteration Steps: "+str(value_iters)
65 | print "Policy:"
66 | policy=value_function_to_policy(env,gamma,value_function)
67 | policy_str=print_policy(policy,lake_env.action_names)
68 | ps=[]
69 | for elem in policy_str:
70 |     ps.append(elem[0])
71 | reshaped_policy=np.reshape(ps,(grid,grid))
72 | print tabulate(reshaped_policy,tablefmt='latex')
73 | f, ax = plt.subplots(figsize=(11, 9))
74 | cmap = seaborn.diverging_palette(220, 10, as_cmap=True)
75 | reshaped=np.reshape(value_function,(grid,grid))
76 | seaborn.heatmap(reshaped, cmap=cmap, vmax=5,
77 |             square=True, xticklabels=grid+1, yticklabels=grid+1,
78 |             linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
79 | plt.savefig(envname+'.png',bbox_inches='tight')
80 | np.savetxt(envname+'_2cvalue.csv',reshaped,delimiter=',')
81 | 
82 | # total_cum_reward=0
83 | # maxn=5
84 | # start_time=time.time()
85 | # for n in range(maxn):
86 | #     cum_reward,nsteps=run_policy(env,gamma,policy)
87 | #     total_cum_reward+=cum_reward
88 | #     if n%1==0: print "Done "+str(n)
89 | # print ("Time: "+str((time.time()-start_time)/60))
90 | #
91 | # print "Average Cumulative Reward: "+str((total_cum_reward/maxn))
92 | # print "No. of steps: "+str(nsteps)
93 | 
94 | 


--------------------------------------------------------------------------------
/deeprl_hw1/rl.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import division, absolute_import
  3 | from __future__ import print_function, unicode_literals
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | def evaluate_policy(env, gamma, policy, max_iterations=int(1e3), tol=1e-3):
  9 |     """Evaluate the value of a policy.
 10 | 
 11 |     See page 87 (pg 105 pdf) of the Sutton and Barto Second Edition
 12 |     book.
 13 | 
 14 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     env: gym.core.Environment
 19 |       The environment to compute value iteration for. Must have nS,
 20 |       nA, and P as attributes.
 21 |     gamma: float
 22 |       Discount factor, must be in range [0, 1)
 23 |     policy: np.array
 24 |       The policy to evaluate. Maps states to actions.
 25 |     max_iterations: int
 26 |       The maximum number of iterations to run before stopping.
 27 |     tol: float
 28 |       Determines when value function has converged.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     np.ndarray
 33 |       The value for the given policy
 34 |     """
 35 |     return np.zeros(env.nS)
 36 | 
 37 | 
 38 | def value_function_to_policy(env, gamma, value_function):
 39 |     """Output action numbers for each state in value_function.
 40 | 
 41 |     Parameters
 42 |     ----------
 43 |     env: gym.core.Environment
 44 |       Environment to compute policy for. Must have nS, nA, and P as
 45 |       attributes.
 46 |     gamma: float
 47 |       Discount factor. Number in range [0, 1)
 48 |     value_function: np.ndarray
 49 |       Value of each state.
 50 | 
 51 |     Returns
 52 |     -------
 53 |     np.ndarray
 54 |       An array of integers. Each integer is the optimal action to take
 55 |       in that state according to the environment dynamics and the
 56 |       given value function.
 57 |     """    
 58 |     return np.zeros(env.nS, dtype='int')
 59 | 
 60 | 
 61 | def improve_policy(env, gamma, value_func, policy):
 62 |     """Given a policy and value function improve the policy.
 63 | 
 64 |     See page 87 (pg 105 pdf) of the Sutton and Barto Second Edition
 65 |     book.
 66 | 
 67 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
 68 | 
 69 |         Parameters
 70 |     ----------
 71 |     env: gym.core.Environment
 72 |       The environment to compute value iteration for. Must have nS,
 73 |       nA, and P as attributes.
 74 |     gamma: float
 75 |       Discount factor, must be in range [0, 1)
 76 |     value_func: np.ndarray
 77 |       Value function for the given policy.
 78 |     policy: dict or np.array
 79 |       The policy to improve. Maps states to actions.
 80 |     max_iterations: int
 81 |       The maximum number of iterations to run before stopping.
 82 |     tol: float
 83 |       Determines when value function has converged.
 84 | 
 85 |     Returns
 86 |     -------
 87 |     bool, np.ndarray
 88 |       Returns true if policy changed. Also returns the new policy.
 89 |     """
 90 |     return False, policy
 91 | 
 92 | 
 93 | def policy_iteration(env, gamma, max_iterations=int(1e3), tol=1e-3):
 94 |     """Runs policy iteration.
 95 | 
 96 |     See page 87 (pg 105 pdf) of the Sutton and Barto Second Edition
 97 |     book.
 98 | 
 99 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
100 | 
101 |     You should use the improve_policy and evaluate_policy methods to
102 |     implement this method.
103 | 
104 |     Parameters
105 |     ----------
106 |     env: gym.core.Environment
107 |       The environment to compute value iteration for. Must have nS,
108 |       nA, and P as attributes.
109 |     gamma: float
110 |       Discount factor, must be in range [0, 1)
111 |     max_iterations: int
112 |       The maximum number of iterations to run before stopping.
113 |     tol: float
114 |       Determines when value function has converged.
115 | 
116 |     Returns
117 |     -------
118 |     (np.ndarray, np.ndarray, int, int)
119 |        Returns optimal policy, value function, number of policy
120 |        improvement iterations, and number of value iterations.
121 |     """
122 |     policy = np.zeros(env.nS, dtype='int')
123 |     value_func = np.zeros(env.nS)
124 | 
125 |     return policy, value_func, 0, 0
126 | 
127 | 
128 | def value_iteration(env, gamma, max_iterations=int(1e3), tol=1e-3):
129 |     """Runs value iteration for a given gamma and environment.
130 | 
131 |     See page 90 (pg 108 pdf) of the Sutton and Barto Second Edition
132 |     book.
133 | 
134 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
135 | 
136 |     Parameters
137 |     ----------
138 |     env: gym.core.Environment
139 |       The environment to compute value iteration for. Must have nS,
140 |       nA, and P as attributes.
141 |     gamma: float
142 |       Discount factor, must be in range [0, 1)
143 |     max_iterations: int
144 |       The maximum number of iterations to run before stopping.
145 |     tol: float
146 |       Determines when value function has converged.
147 | 
148 |     Returns
149 |     -------
150 |     np.ndarray, iteration
151 |       The value function and the number of iterations it took to converge.
152 |     """
153 |     return np.zeros(env.nS), 0
154 | 
155 | 
156 | def print_policy(policy, action_names):
157 |     """Print the policy in human-readable format.
158 | 
159 |     Parameters
160 |     ----------
161 |     policy: np.ndarray
162 |       Array of state to action number mappings
163 |     action_names: dict
164 |       Mapping of action numbers to characters representing the action.
165 |     """
166 |     str_policy = policy.astype('str')
167 |     for action_num, action_name in action_names.items():
168 |         np.place(str_policy, policy == action_num, action_name)
169 | 
170 |     print(str_policy)
171 | 


--------------------------------------------------------------------------------
/21.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 15,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import gym\n",
 12 |     "import deeprl_hw1\n",
 13 |     "from example import *"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 37,
 19 |    "metadata": {
 20 |     "collapsed": false
 21 |    },
 22 |    "outputs": [
 23 |     {
 24 |      "name": "stderr",
 25 |      "output_type": "stream",
 26 |      "text": [
 27 |       "INFO:gym.envs.registration:Making new env: Deterministic-4x4-neg-reward-FrozenLake-v0\n",
 28 |       "[2017-02-15 00:08:57,815] Making new env: Deterministic-4x4-neg-reward-FrozenLake-v0\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "env=gym.make('Deterministic-4x4-neg-reward-FrozenLake-v0')"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 30,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "0\n",
 48 |       "2\n",
 49 |       "3\n",
 50 |       "1\n"
 51 |      ]
 52 |     }
 53 |    ],
 54 |    "source": [
 55 |     "print(deeprl_hw1.lake_envs.LEFT)\n",
 56 |     "print(deeprl_hw1.lake_envs.RIGHT)\n",
 57 |     "print(deeprl_hw1.lake_envs.UP)\n",
 58 |     "print(deeprl_hw1.lake_envs.DOWN)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 38,
 64 |    "metadata": {
 65 |     "collapsed": false
 66 |    },
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "{0: {0: [(1.0, 0, -1, False)], 1: [(1.0, 4, -1, False)], 2: [(1.0, 1, -1, False)], 3: [(1.0, 0, -1, False)]}, 1: {0: [(1.0, 0, -1, False)], 1: [(1.0, 5, 0, True)], 2: [(1.0, 2, -1, False)], 3: [(1.0, 1, -1, False)]}, 2: {0: [(1.0, 1, -1, False)], 1: [(1.0, 6, -1, False)], 2: [(1.0, 3, -1, False)], 3: [(1.0, 2, -1, False)]}, 3: {0: [(1.0, 2, -1, False)], 1: [(1.0, 7, 0, True)], 2: [(1.0, 3, -1, False)], 3: [(1.0, 3, -1, False)]}, 4: {0: [(1.0, 4, -1, False)], 1: [(1.0, 8, -1, False)], 2: [(1.0, 5, 0, True)], 3: [(1.0, 0, -1, False)]}, 5: {0: [(1.0, 5, 0, True)], 1: [(1.0, 5, 0, True)], 2: [(1.0, 5, 0, True)], 3: [(1.0, 5, 0, True)]}, 6: {0: [(1.0, 5, 0, True)], 1: [(1.0, 10, -1, False)], 2: [(1.0, 7, 0, True)], 3: [(1.0, 2, -1, False)]}, 7: {0: [(1.0, 7, 0, True)], 1: [(1.0, 7, 0, True)], 2: [(1.0, 7, 0, True)], 3: [(1.0, 7, 0, True)]}, 8: {0: [(1.0, 8, -1, False)], 1: [(1.0, 12, 0, True)], 2: [(1.0, 9, -1, False)], 3: [(1.0, 4, -1, False)]}, 9: {0: [(1.0, 8, -1, False)], 1: [(1.0, 13, -1, False)], 2: [(1.0, 10, -1, False)], 3: [(1.0, 5, 0, True)]}, 10: {0: [(1.0, 9, -1, False)], 1: [(1.0, 14, -1, False)], 2: [(1.0, 11, 0, True)], 3: [(1.0, 6, -1, False)]}, 11: {0: [(1.0, 11, 0, True)], 1: [(1.0, 11, 0, True)], 2: [(1.0, 11, 0, True)], 3: [(1.0, 11, 0, True)]}, 12: {0: [(1.0, 12, 0, True)], 1: [(1.0, 12, 0, True)], 2: [(1.0, 12, 0, True)], 3: [(1.0, 12, 0, True)]}, 13: {0: [(1.0, 12, 0, True)], 1: [(1.0, 13, -1, False)], 2: [(1.0, 14, -1, False)], 3: [(1.0, 9, -1, False)]}, 14: {0: [(1.0, 13, -1, False)], 1: [(1.0, 14, -1, False)], 2: [(1.0, 15, 1, True)], 3: [(1.0, 10, -1, False)]}, 15: {0: [(1.0, 15, 1, True)], 1: [(1.0, 15, 1, True)], 2: [(1.0, 15, 1, True)], 3: [(1.0, 15, 1, True)]}}\n"
 73 |      ]
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "print env.P"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 16,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "Environment has 16 states and 4 actions.\n"
 92 |      ]
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "print_env_info(env)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 17,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [
106 |     {
107 |      "name": "stdout",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "According to transition function, taking action RIGHT(2) in state 0 leads to 1 possible outcomes\n",
111 |       "\tTransitioning to non-terminal state 1 with probability 1.000000 and reward 0.000000\n"
112 |      ]
113 |     }
114 |    ],
115 |    "source": [
116 |     "print_model_info(env,0,deeprl_hw1.lake_envs.RIGHT)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 18,
122 |    "metadata": {
123 |     "collapsed": false
124 |    },
125 |    "outputs": [
126 |     {
127 |      "name": "stdout",
128 |      "output_type": "stream",
129 |      "text": [
130 |       "\u001b[41mS\u001b[0mFFF\n",
131 |       "FHFH\n",
132 |       "FFFH\n",
133 |       "HFFG\n",
134 |       "\n"
135 |      ]
136 |     },
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "<ipykernel.iostream.OutStream at 0x103a1a490>"
141 |       ]
142 |      },
143 |      "execution_count": 18,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "env.render()"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 24,
155 |    "metadata": {
156 |     "collapsed": false
157 |    },
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       "4\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "print env.nA"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 23,
174 |    "metadata": {
175 |     "collapsed": false
176 |    },
177 |    "outputs": [
178 |     {
179 |      "name": "stdout",
180 |      "output_type": "stream",
181 |      "text": [
182 |       "4\n"
183 |      ]
184 |     }
185 |    ],
186 |    "source": [
187 |     "print env.action_space.n"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {
194 |     "collapsed": true
195 |    },
196 |    "outputs": [],
197 |    "source": []
198 |   }
199 |  ],
200 |  "metadata": {
201 |   "kernelspec": {
202 |    "display_name": "Python 2",
203 |    "language": "python",
204 |    "name": "python2"
205 |   },
206 |   "language_info": {
207 |    "codemirror_mode": {
208 |     "name": "ipython",
209 |     "version": 2
210 |    },
211 |    "file_extension": ".py",
212 |    "mimetype": "text/x-python",
213 |    "name": "python",
214 |    "nbconvert_exporter": "python",
215 |    "pygments_lexer": "ipython2",
216 |    "version": "2.7.12"
217 |   }
218 |  },
219 |  "nbformat": 4,
220 |  "nbformat_minor": 0
221 | }
222 | 


--------------------------------------------------------------------------------
/deeprl_hw1/queue_envs.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """Define the Queue environment from problem 3 here."""
  3 | 
  4 | from __future__ import (absolute_import, division, print_function,
  5 |                         unicode_literals)
  6 | 
  7 | from gym import Env, spaces
  8 | from gym.envs.registration import register
  9 | import numpy
 10 | import itertools
 11 | 
 12 | class QueueEnv(Env):
 13 |     """Implement the Queue environment from problem 3.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     p1: float
 18 |       Value between [0, 1]. The probability of queue 1 receiving a new item.
 19 |     p2: float
 20 |       Value between [0, 1]. The probability of queue 2 receiving a new item.
 21 |     p3: float
 22 |       Value between [0, 1]. The probability of queue 3 receiving a new item.
 23 | 
 24 |     Attributes
 25 |     ----------
 26 |     nS: number of states
 27 |     nA: number of actions
 28 |     P: environment model
 29 |     """
 30 |     metadata = {'render.modes': ['human']}
 31 | 
 32 |     SWITCH_TO_1 = 0
 33 |     SWITCH_TO_2 = 1
 34 |     SWITCH_TO_3 = 2
 35 |     SERVICE_QUEUE = 3
 36 | 
 37 | 
 38 | 
 39 | 
 40 |     def __init__(self, p1, p2, p3):
 41 |         self.action_space = spaces.Discrete(4)
 42 |         self.observation_space = spaces.MultiDiscrete(
 43 |             [(1, 3), (0, 5), (0, 5), (0, 5)])
 44 |         self.nS = 0
 45 |         self.nA = 4
 46 |         self.P = dict()
 47 |         self.current_state=(1,0,0,0)
 48 |         self.p1=p1
 49 |         self.p2=p2
 50 |         self.p3=p3
 51 | 
 52 | 
 53 |     def _reset(self):
 54 |         """Reset the environment.
 55 | 
 56 |         The server should always start on Queue 1.
 57 | 
 58 |         Returns
 59 |         -------
 60 |         (int, int, int, int)
 61 |           A tuple representing the current state with meanings
 62 |           (current queue, num items in 1, num items in 2, num items in
 63 |           3).
 64 |         """
 65 |         self.current_state=(1,0,0,0)
 66 |         return self.current_state
 67 | 
 68 |     def _step(self, action):
 69 |         """Execute the specified action.
 70 | 
 71 |         Parameters
 72 |         ----------
 73 |         action: int
 74 |           A number in range [0, 3]. Represents the action.
 75 | 
 76 |         Returns
 77 |         -------
 78 |         (state, reward, is_terminal, debug_info)
 79 |           State is the tuple in the same format as the reset
 80 |           method. Reward is a floating point number. is_terminal is a
 81 |           boolean representing if the new state is a terminal
 82 |           state. debug_info is a dictionary. You can fill debug_info
 83 |           with any additional information you deem useful.
 84 |         """
 85 |         possible_next_states=self.query_model(self.current_state,action)
 86 |         probarray=[]
 87 |         for ps in possible_next_states:
 88 |             probarray.append(ps[0])
 89 |         probs=numpy.asarray(probarray)
 90 |         randomarray=numpy.random.rand(len(possible_next_states),1)
 91 |         next_state_index=self.categorical_sample(probs,randomarray)
 92 |         pns=possible_next_states[next_state_index]
 93 |         next_state=(pns[1],pns[2],pns[3],dict())
 94 |         self.current_state=next_state[0]
 95 |         return next_state
 96 | 
 97 | 
 98 | 
 99 |     def _render(self, mode='human', close=False):
100 |         print ("Current Q: "+str(self.current_state[0]))
101 |         print ("Items in Q1: "+str(self.current_state[1]))
102 |         print ("Items in Q2: "+str(self.current_state[2]))
103 |         print ("Items in Q3: "+str(self.current_state[3]))
104 |         print ("\n")
105 | 
106 | 
107 |     def _seed(self, seed=None):
108 |         """Set the random seed.
109 | 
110 |         Parameters
111 |         ----------
112 |         seed: int, None
113 |           Random seed used by numpy.random and random.
114 |         """
115 |         pass
116 | 
117 |     def query_model(self, state, action):
118 |         """Return the possible transition outcomes for a state-action pair.
119 | 
120 |         This should be in the same format at the provided environments
121 |         in section 2.
122 | 
123 |         Parameters
124 |         ----------
125 |         state
126 |           State used in query. Should be in the same format at
127 |           the states returned by reset and step.
128 |         action: int
129 |           The action used in query.
130 | 
131 |         Returns
132 |         -------
133 |         [(prob, nextstate, reward, is_terminal), ...]
134 |           List of possible outcomes
135 |         """
136 |         lst=list(itertools.product([0,1],repeat=3))
137 |         reward=0
138 |         newstate=list(state)
139 |         if action==QueueEnv.SERVICE_QUEUE:
140 |             currq=newstate[0]
141 |             if newstate[currq]>0:
142 |                 newstate[currq]-=1
143 |                 reward=1
144 |         elif action==QueueEnv.SWITCH_TO_1:
145 |             newstate[0]=1
146 |         elif action==QueueEnv.SWITCH_TO_2:
147 |             newstate[0]=2
148 |         elif action==QueueEnv.SWITCH_TO_3:
149 |             newstate[0]=3
150 |         blockq1=1
151 |         blockq2=1
152 |         blockq3=1
153 |         if newstate[1]>=5: blockq1=0
154 |         if newstate[2]>=5: blockq2=0
155 |         if newstate[3]>=5: blockq3=0
156 |         possible_states=[]
157 |         for combination in lst:
158 |             q1=combination[0]
159 |             q2=combination[1]
160 |             q3=combination[2]
161 |             state_prob=0
162 |             newpstate=newstate[:]
163 |             if blockq1==0 or q1==0: state_prob+=(1-self.p1)
164 |             else:
165 |                 state_prob+=self.p1
166 |                 newpstate[1]+=1
167 |             if blockq2==0 or q2==0: state_prob=state_prob*(1-self.p2)
168 |             else:
169 |                 state_prob=state_prob*self.p2
170 |                 newpstate[2]+=1
171 |             if blockq3==0 or q3==0: state_prob=state_prob*(1-self.p3)
172 |             else:
173 |                 state_prob=state_prob*self.p3
174 |                 newpstate[3]+=1
175 |             found=False
176 |             for psalready in possible_states:
177 |                 if tuple(newpstate) == psalready[1]:
178 |                     found=True
179 |                     break
180 |             if not found: possible_states.append((state_prob,tuple(newpstate)))
181 |         total_prob=0
182 |         for ps in possible_states:
183 |             total_prob+=ps[0]
184 |         for i in range(len(possible_states)):
185 |             unnormalized_state=possible_states[i]
186 |             possible_states[i]=(float(unnormalized_state[0])/float(total_prob),unnormalized_state[1])
187 |         final_list=[]
188 |         for ps in possible_states:
189 |             final_list.append((ps[0],ps[1],reward,False))
190 |         return final_list
191 | 
192 |     def get_action_name(self, action):
193 |         if action == QueueEnv.SERVICE_QUEUE:
194 |             return 'SERVICE_QUEUE'
195 |         elif action == QueueEnv.SWITCH_TO_1:
196 |             return 'SWITCH_TO_1'
197 |         elif action == QueueEnv.SWITCH_TO_2:
198 |             return 'SWITCH_TO_2'
199 |         elif action == QueueEnv.SWITCH_TO_3:
200 |             return 'SWITCH_TO_3'
201 |         return 'UNKNOWN'
202 | 
203 |     def categorical_sample(self, prob_n, np_random):
204 |         """
205 |         Sample from categorical distribution
206 |         Each row specifies class probabilities
207 |         """
208 |         csprob_n = numpy.cumsum(prob_n)
209 |         return (csprob_n > np_random).argmax()
210 | 
211 | register(
212 |     id='Queue-1-v0',
213 |     entry_point='deeprl_hw1.queue_envs:QueueEnv',
214 |     kwargs={'p1': .1,
215 |             'p2': .9,
216 |             'p3': .1})
217 | 
218 | register(
219 |     id='Queue-2-v0',
220 |     entry_point='deeprl_hw1.queue_envs:QueueEnv',
221 |     kwargs={'p1': .1,
222 |             'p2': .1,
223 |             'p3': .1})
224 | 


--------------------------------------------------------------------------------
/deeprl_hw1/rl1.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import division, absolute_import
  3 | from __future__ import print_function, unicode_literals
  4 | 
  5 | import numpy as np
  6 | import math
  7 | 
  8 | def evaluate_policy(env, gamma, policy, max_iterations=int(1e3), tol=1e-3):
  9 |     """Evaluate the value of a policy.
 10 | 
 11 |     See page 87 (pg 105 pdf) of the Sutton and Barto Second Edition
 12 |     book.
 13 | 
 14 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     env: gym.core.Environment
 19 |       The environment to compute value iteration for. Must have nS,
 20 |       nA, and P as attributes.
 21 |     gamma: float
 22 |       Discount factor, must be in range [0, 1)
 23 |     policy: np.array
 24 |       The policy to evaluate. Maps states to actions.
 25 |     max_iterations: int
 26 |       The maximum number of iterations to run before stopping.
 27 |     tol: float
 28 |       Determines when value function has converged.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     np.ndarray
 33 |       The value for the given policy
 34 |     """
 35 |     value_func_old = np.random.rand(env.nS)
 36 |     value_func_new = np.zeros(env.nS)
 37 |     for iteration in range(max_iterations):
 38 |         delta=0
 39 |         for s in range(env.nS):
 40 |             vs=0
 41 |             actions=[policy[s]]
 42 |             #if len(actions)==1: actions=[actions]
 43 |             for a in actions:
 44 |                 for possible_next_state in env.P[s][a]:
 45 |                     prob_action = possible_next_state[0]
 46 |                     cur_reward=possible_next_state[2]
 47 |                     future_reward=gamma*value_func_old[possible_next_state[1]]
 48 |                     vs+=prob_action*(cur_reward+future_reward)
 49 |                 #if env.P[s][a][3]:break
 50 |             diff=abs(value_func_old[s]-vs)
 51 |             delta=max(delta,diff)
 52 |             value_func_new[s]=vs
 53 |         #delta=math.sqrt(delta)
 54 |         if delta<=tol: break
 55 |         value_func_old = value_func_new
 56 |     return value_func_new, iteration
 57 | 
 58 | 
 59 | def value_function_to_policy(env, gamma, value_function):
 60 |     """Output action numbers for each state in value_function.
 61 | 
 62 |     Parameters
 63 |     ----------
 64 |     env: gym.core.Environment
 65 |       Environment to compute policy for. Must have nS, nA, and P as
 66 |       attributes.
 67 |     gamma: float
 68 |       Discount factor. Number in range [0, 1)
 69 |     value_function: np.ndarray
 70 |       Value of each state.
 71 | 
 72 |     Returns
 73 |     -------
 74 |     np.ndarray
 75 |       An array of integers. Each integer is the optimal action to take
 76 |       in that state according to the environment dynamics and the
 77 |       given value function.
 78 |     """
 79 |     policy=np.zeros(env.nS,dtype='int')
 80 |     for s in range(env.nS):
 81 |         maxvsa=-1
 82 |         maxa=-1
 83 |         for a in range(env.nA):
 84 |             vsa=0
 85 |             for possible_next_state in env.P[s][a]:
 86 |                 prob_action = possible_next_state[0]
 87 |                 cur_reward = possible_next_state[2]
 88 |                 future_reward = gamma * value_function[possible_next_state[1]]
 89 |                 vsa+=prob_action * (cur_reward + future_reward)
 90 |             if vsa>maxvsa:
 91 |                 maxvsa=vsa
 92 |                 maxa=a
 93 |         policy[s]=maxa
 94 | 
 95 |     return policy
 96 | 
 97 | 
 98 | def improve_policy(env, gamma, value_func, policy):
 99 |     """Given a policy and value function improve the policy.
100 | 
101 |     See page 87 (pg 105 pdf) of the Sutton and Barto Second Edition
102 |     book.
103 | 
104 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
105 | 
106 |         Parameters
107 |     ----------
108 |     env: gym.core.Environment
109 |       The environment to compute value iteration for. Must have nS,
110 |       nA, and P as attributes.
111 |     gamma: float
112 |       Discount factor, must be in range [0, 1)
113 |     value_func: np.ndarray
114 |       Value function for the given policy.
115 |     policy: dict or np.array
116 |       The policy to improve. Maps states to actions.
117 |     max_iterations: int
118 |       The maximum number of iterations to run before stopping.
119 |     tol: float
120 |       Determines when value function has converged.
121 | 
122 |     Returns
123 |     -------
124 |     bool, np.ndarray
125 |       Returns true if policy changed. Also returns the new policy.
126 |     """
127 |     stable=True
128 |     for s in range(env.nS):
129 |         old_action=policy[s]
130 |         maxvsa=-1
131 |         maxa=-1
132 |         for a in range(env.nA):
133 |             vsa=0
134 |             for possible_next_state in env.P[s][a]:
135 |                 prob_action = possible_next_state[0]
136 |                 cur_reward = possible_next_state[2]
137 |                 future_reward = gamma * value_func[possible_next_state[1]]
138 |                 vsa+=prob_action * (cur_reward + future_reward)
139 |             if vsa>maxvsa:
140 |                 maxvsa=vsa
141 |                 maxa=a
142 |         if maxa!=old_action: stable=False
143 |         policy[s]=maxa
144 |     return stable, policy
145 | 
146 | 
147 | def policy_iteration(env, gamma, max_iterations=int(1e3), tol=1e-3):
148 |     """Runs policy iteration.
149 | 
150 |     See page 87 (pg 105 pdf) of the Sutton and Barto Second Edition
151 |     book.
152 | 
153 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
154 | 
155 |     You should use the improve_policy and evaluate_policy methods to
156 |     implement this method.
157 | 
158 |     Parameters
159 |     ----------
160 |     env: gym.core.Environment
161 |       The environment to compute value iteration for. Must have nS,
162 |       nA, and P as attributes.
163 |     gamma: float
164 |       Discount factor, must be in range [0, 1)
165 |     max_iterations: int
166 |       The maximum number of iterations to run before stopping.
167 |     tol: float
168 |       Determines when value function has converged.
169 | 
170 |     Returns
171 |     -------
172 |     (np.ndarray, np.ndarray, int, int)
173 |        Returns optimal policy, value function, number of policy
174 |        improvement iterations, and number of value iterations.
175 |     """
176 |     policy = np.zeros(env.nS, dtype='int')
177 |     value_func = np.zeros(env.nS)
178 |     stable=False
179 |     iters=0
180 |     eval_iters=0
181 |     while not stable:
182 |         value_func,iter=evaluate_policy(env,gamma,policy)
183 |         eval_iters+=iter
184 |         stable,policy=improve_policy(env,gamma,value_func,policy)
185 |         iters+=1
186 |     return policy, value_func, iters, eval_iters
187 | 
188 | 
189 | def value_iteration(env, gamma, max_iterations=int(1e3), tol=1e-3):
190 |     """Runs value iteration for a given gamma and environment.
191 | 
192 |     See page 90 (pg 108 pdf) of the Sutton and Barto Second Edition
193 |     book.
194 | 
195 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
196 | 
197 |     Parameters
198 |     ----------
199 |     env: gym.core.Environment
200 |       The environment to compute value iteration for. Must have nS,
201 |       nA, and P as attributes.
202 |     gamma: float
203 |       Discount factor, must be in range [0, 1)
204 |     max_iterations: int
205 |       The maximum number of iterations to run before stopping.
206 |     tol: float
207 |       Determines when value function has converged.
208 | 
209 |     Returns
210 |     -------
211 |     np.ndarray, iteration
212 |       The value function and the number of iterations it took to converge.
213 |     """
214 |     value_func_old = np.random.rand(env.nS)
215 |     value_func_new = np.zeros(env.nS)
216 |     for iteration in range(max_iterations):
217 |         delta=0
218 |         for s in range(env.nS):
219 |             maxvsa = -1
220 |             for a in range(env.nA):
221 |                 vsa=0
222 |                 for possible_next_state in env.P[s][a]:
223 |                     prob_action = possible_next_state[0]
224 |                     cur_reward=possible_next_state[2]
225 |                     if possible_next_state[3]:
226 |                         future_reward=0
227 |                     else: future_reward=gamma*value_func_old[possible_next_state[1]]
228 |                     vsa+=prob_action*(cur_reward+future_reward)
229 |                 if vsa>maxvsa:
230 |                     maxvsa=vsa
231 |             #diff=math.pow((value_func_old[s]-maxvsa),2)
232 |             diff=abs(value_func_old[s]-maxvsa)
233 |             delta=max(delta,diff)
234 |             value_func_new[s]=maxvsa
235 |         #delta=math.sqrt(delta)
236 |         if delta<=tol: break
237 |         value_func_old = value_func_new
238 | 
239 |     return value_func_new, iteration
240 | 
241 | 
242 | def print_policy(policy, action_names):
243 |     """Print the policy in human-readable format.
244 | 
245 |     Parameters
246 |     ----------
247 |     policy: np.ndarray
248 |       Array of state to action number mappings
249 |     action_names: dict
250 |       Mapping of action numbers to characters representing the action.
251 |     """
252 |     str_policy = policy.astype('str')
253 |     for action_num, action_name in action_names.items():
254 |         np.place(str_policy, policy == action_num, action_name)
255 | 
256 |     print(str_policy)
257 |     return str_policy
258 | 


--------------------------------------------------------------------------------
/deeprl_hw1/rlvaliterchngd.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import division, absolute_import
  3 | from __future__ import print_function, unicode_literals
  4 | 
  5 | import numpy as np
  6 | import math
  7 | 
  8 | def evaluate_policy(env, gamma, policy, max_iterations=int(1e3), tol=1e-3):
  9 |     """Evaluate the value of a policy.
 10 | 
 11 |     See page 87 (pg 105 pdf) of the Sutton and Barto Second Edition
 12 |     book.
 13 | 
 14 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     env: gym.core.Environment
 19 |       The environment to compute value iteration for. Must have nS,
 20 |       nA, and P as attributes.
 21 |     gamma: float
 22 |       Discount factor, must be in range [0, 1)
 23 |     policy: np.array
 24 |       The policy to evaluate. Maps states to actions.
 25 |     max_iterations: int
 26 |       The maximum number of iterations to run before stopping.
 27 |     tol: float
 28 |       Determines when value function has converged.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     np.ndarray
 33 |       The value for the given policy
 34 |     """
 35 |     value_func_old = np.random.rand(env.nS)
 36 |     value_func_new = np.zeros(env.nS)
 37 |     for iteration in range(max_iterations):
 38 |         delta=0
 39 |         for s in range(env.nS):
 40 |             vs=0
 41 |             actions=[policy[s]]
 42 |             #if len(actions)==1: actions=[actions]
 43 |             for a in actions:
 44 |                 for possible_next_state in env.P[s][a]:
 45 |                     prob_action = possible_next_state[0]
 46 |                     cur_reward=possible_next_state[2]
 47 |                     future_reward=gamma*value_func_old[possible_next_state[1]]
 48 |                     vs+=prob_action*(cur_reward+future_reward)
 49 |                 #if env.P[s][a][3]:break
 50 |             diff=abs(value_func_old[s]-vs)
 51 |             delta=max(delta,diff)
 52 |             value_func_new[s]=vs
 53 |         #delta=math.sqrt(delta)
 54 |         if delta<=tol: break
 55 |         value_func_old = value_func_new
 56 |     return value_func_new, iteration
 57 | 
 58 | 
 59 | def value_function_to_policy(env, gamma, value_function):
 60 |     """Output action numbers for each state in value_function.
 61 | 
 62 |     Parameters
 63 |     ----------
 64 |     env: gym.core.Environment
 65 |       Environment to compute policy for. Must have nS, nA, and P as
 66 |       attributes.
 67 |     gamma: float
 68 |       Discount factor. Number in range [0, 1)
 69 |     value_function: np.ndarray
 70 |       Value of each state.
 71 | 
 72 |     Returns
 73 |     -------
 74 |     np.ndarray
 75 |       An array of integers. Each integer is the optimal action to take
 76 |       in that state according to the environment dynamics and the
 77 |       given value function.
 78 |     """
 79 |     policy=np.zeros(env.nS,dtype='int')
 80 |     for s in range(env.nS):
 81 |         maxvsa=-1
 82 |         maxa=-1
 83 |         for a in range(env.nA):
 84 |             vsa=0
 85 |             for possible_next_state in env.P[s][a]:
 86 |                 prob_action = possible_next_state[0]
 87 |                 cur_reward = possible_next_state[2]
 88 |                 future_reward = gamma * value_function[possible_next_state[1]]
 89 |                 vsa+=prob_action * (cur_reward + future_reward)
 90 |             if vsa>maxvsa:
 91 |                 maxvsa=vsa
 92 |                 maxa=a
 93 |         policy[s]=maxa
 94 | 
 95 |     return policy
 96 | 
 97 | 
 98 | def improve_policy(env, gamma, value_func, policy):
 99 |     """Given a policy and value function improve the policy.
100 | 
101 |     See page 87 (pg 105 pdf) of the Sutton and Barto Second Edition
102 |     book.
103 | 
104 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
105 | 
106 |         Parameters
107 |     ----------
108 |     env: gym.core.Environment
109 |       The environment to compute value iteration for. Must have nS,
110 |       nA, and P as attributes.
111 |     gamma: float
112 |       Discount factor, must be in range [0, 1)
113 |     value_func: np.ndarray
114 |       Value function for the given policy.
115 |     policy: dict or np.array
116 |       The policy to improve. Maps states to actions.
117 |     max_iterations: int
118 |       The maximum number of iterations to run before stopping.
119 |     tol: float
120 |       Determines when value function has converged.
121 | 
122 |     Returns
123 |     -------
124 |     bool, np.ndarray
125 |       Returns true if policy changed. Also returns the new policy.
126 |     """
127 |     stable=True
128 |     for s in range(env.nS):
129 |         old_action=policy[s]
130 |         maxvsa=-1
131 |         maxa=-1
132 |         for a in range(env.nA):
133 |             vsa=0
134 |             for possible_next_state in env.P[s][a]:
135 |                 prob_action = possible_next_state[0]
136 |                 cur_reward = possible_next_state[2]
137 |                 future_reward = gamma * value_func[possible_next_state[1]]
138 |                 vsa+=prob_action * (cur_reward + future_reward)
139 |             if vsa>maxvsa:
140 |                 maxvsa=vsa
141 |                 maxa=a
142 |         if maxa!=old_action: stable=False
143 |         policy[s]=maxa
144 |     return stable, policy
145 | 
146 | 
147 | def policy_iteration(env, gamma, max_iterations=int(1e3), tol=1e-3):
148 |     """Runs policy iteration.
149 | 
150 |     See page 87 (pg 105 pdf) of the Sutton and Barto Second Edition
151 |     book.
152 | 
153 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
154 | 
155 |     You should use the improve_policy and evaluate_policy methods to
156 |     implement this method.
157 | 
158 |     Parameters
159 |     ----------
160 |     env: gym.core.Environment
161 |       The environment to compute value iteration for. Must have nS,
162 |       nA, and P as attributes.
163 |     gamma: float
164 |       Discount factor, must be in range [0, 1)
165 |     max_iterations: int
166 |       The maximum number of iterations to run before stopping.
167 |     tol: float
168 |       Determines when value function has converged.
169 | 
170 |     Returns
171 |     -------
172 |     (np.ndarray, np.ndarray, int, int)
173 |        Returns optimal policy, value function, number of policy
174 |        improvement iterations, and number of value iterations.
175 |     """
176 |     policy = np.zeros(env.nS, dtype='int')
177 |     value_func = np.zeros(env.nS)
178 |     stable=False
179 |     iters=0
180 |     eval_iters=0
181 |     while not stable:
182 |         value_func,iter=evaluate_policy(env,gamma,policy)
183 |         eval_iters+=iter
184 |         stable,policy=improve_policy(env,gamma,value_func,policy)
185 |         iters+=1
186 |     return policy, value_func, iters, eval_iters
187 | 
188 | 
189 | def value_iteration(env, gamma, max_iterations=int(1e3), tol=1e-3):
190 |     """Runs value iteration for a given gamma and environment.
191 | 
192 |     See page 90 (pg 108 pdf) of the Sutton and Barto Second Edition
193 |     book.
194 | 
195 |     http://webdocs.cs.ualberta.ca/~sutton/book/bookdraft2016sep.pdf
196 | 
197 |     Parameters
198 |     ----------
199 |     env: gym.core.Environment
200 |       The environment to compute value iteration for. Must have nS,
201 |       nA, and P as attributes.
202 |     gamma: float
203 |       Discount factor, must be in range [0, 1)
204 |     max_iterations: int
205 |       The maximum number of iterations to run before stopping.
206 |     tol: float
207 |       Determines when value function has converged.
208 | 
209 |     Returns
210 |     -------
211 |     np.ndarray, iteration
212 |       The value function and the number of iterations it took to converge.
213 |     """
214 |     value_func_old = np.random.rand(env.nS)
215 |     value_func_new = np.zeros(env.nS)
216 |     for iteration in range(max_iterations):
217 |         delta=0
218 |         for s in range(env.nS):
219 |             maxvsa = -1
220 |             for a in range(env.nA):
221 |                 vsa=0
222 |                 for possible_next_state in env.P[s][a]:
223 |                     prob_action = possible_next_state[0]
224 |                     cur_reward=possible_next_state[2]
225 |                     if value_func_new[possible_next_state[1]]==0:
226 |                         future_reward=gamma*value_func_old[possible_next_state[1]]
227 |                     else:
228 |                         future_reward = gamma * value_func_new[possible_next_state[1]]
229 |                     vsa+=prob_action*(cur_reward+future_reward)
230 |                 if vsa>maxvsa:
231 |                     maxvsa=vsa
232 |             #diff=math.pow((value_func_old[s]-maxvsa),2)
233 |             diff=abs(value_func_old[s]-maxvsa)
234 |             delta=max(delta,diff)
235 |             value_func_new[s]=maxvsa
236 |         #delta=math.sqrt(delta)
237 |         if delta<=tol: break
238 |         value_func_old = value_func_new
239 | 
240 |     return value_func_new, iteration
241 | 
242 | 
243 | def print_policy(policy, action_names):
244 |     """Print the policy in human-readable format.
245 | 
246 |     Parameters
247 |     ----------
248 |     policy: np.ndarray
249 |       Array of state to action number mappings
250 |     action_names: dict
251 |       Mapping of action numbers to characters representing the action.
252 |     """
253 |     str_policy = policy.astype('str')
254 |     for action_num, action_name in action_names.items():
255 |         np.place(str_policy, policy == action_num, action_name)
256 | 
257 |     print(str_policy)
258 |     return str_policy
259 | 


--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="ChangeListManager">
  4 |     <list default="true" id="ebbff014-0df4-43eb-a60d-dcf03bf4dd61" name="Default" comment="" />
  5 |     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
  6 |     <option name="TRACKING_ENABLED" value="true" />
  7 |     <option name="SHOW_DIALOG" value="false" />
  8 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
  9 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
 10 |     <option name="LAST_RESOLUTION" value="IGNORE" />
 11 |   </component>
 12 |   <component name="CreatePatchCommitExecutor">
 13 |     <option name="PATCH_PATH" value="" />
 14 |   </component>
 15 |   <component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
 16 |   <component name="FileEditorManager">
 17 |     <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
 18 |       <file leaf-file-name="example.py" pinned="false" current-in-tab="false">
 19 |         <entry file="file://$PROJECT_DIR$/example.py">
 20 |           <provider selected="true" editor-type-id="text-editor">
 21 |             <state relative-caret-position="0">
 22 |               <caret line="40" column="0" lean-forward="false" selection-start-line="40" selection-start-column="0" selection-end-line="40" selection-end-column="0" />
 23 |               <folding>
 24 |                 <element signature="e#39#147#0" expanded="true" />
 25 |               </folding>
 26 |             </state>
 27 |           </provider>
 28 |         </entry>
 29 |       </file>
 30 |       <file leaf-file-name="driver2a.py" pinned="false" current-in-tab="false">
 31 |         <entry file="file://$PROJECT_DIR$/driver2a.py">
 32 |           <provider selected="true" editor-type-id="text-editor">
 33 |             <state relative-caret-position="15">
 34 |               <caret line="1" column="10" lean-forward="true" selection-start-line="1" selection-start-column="10" selection-end-line="1" selection-end-column="10" />
 35 |               <folding />
 36 |             </state>
 37 |           </provider>
 38 |         </entry>
 39 |       </file>
 40 |       <file leaf-file-name="driver2b.py" pinned="false" current-in-tab="false">
 41 |         <entry file="file://$PROJECT_DIR$/driver2b.py">
 42 |           <provider selected="true" editor-type-id="text-editor">
 43 |             <state relative-caret-position="255">
 44 |               <caret line="17" column="65" lean-forward="true" selection-start-line="17" selection-start-column="65" selection-end-line="17" selection-end-column="65" />
 45 |               <folding>
 46 |                 <element signature="e#0#39#0" expanded="true" />
 47 |               </folding>
 48 |             </state>
 49 |           </provider>
 50 |         </entry>
 51 |       </file>
 52 |       <file leaf-file-name="driver3.py" pinned="false" current-in-tab="false">
 53 |         <entry file="file://$PROJECT_DIR$/deeprl_hw1/driver3.py">
 54 |           <provider selected="true" editor-type-id="text-editor">
 55 |             <state relative-caret-position="225">
 56 |               <caret line="15" column="12" lean-forward="true" selection-start-line="15" selection-start-column="12" selection-end-line="15" selection-end-column="12" />
 57 |               <folding>
 58 |                 <element signature="e#0#36#0" expanded="true" />
 59 |               </folding>
 60 |             </state>
 61 |           </provider>
 62 |         </entry>
 63 |       </file>
 64 |       <file leaf-file-name="queue_envs.py" pinned="false" current-in-tab="true">
 65 |         <entry file="file://$PROJECT_DIR$/deeprl_hw1/queue_envs.py">
 66 |           <provider selected="true" editor-type-id="text-editor">
 67 |             <state relative-caret-position="135">
 68 |               <caret line="103" column="20" lean-forward="false" selection-start-line="103" selection-start-column="8" selection-end-line="103" selection-end-column="20" />
 69 |               <folding>
 70 |                 <element signature="e#73#181#0" expanded="true" />
 71 |               </folding>
 72 |             </state>
 73 |           </provider>
 74 |         </entry>
 75 |       </file>
 76 |       <file leaf-file-name="rl1.py" pinned="false" current-in-tab="false">
 77 |         <entry file="file://$PROJECT_DIR$/deeprl_hw1/rl1.py">
 78 |           <provider selected="true" editor-type-id="text-editor">
 79 |             <state relative-caret-position="294">
 80 |               <caret line="226" column="26" lean-forward="false" selection-start-line="226" selection-start-column="26" selection-end-line="226" selection-end-column="26" />
 81 |               <folding>
 82 |                 <element signature="e#16#64#0" expanded="true" />
 83 |               </folding>
 84 |             </state>
 85 |           </provider>
 86 |         </entry>
 87 |       </file>
 88 |       <file leaf-file-name="rlvaliterchngd.py" pinned="false" current-in-tab="false">
 89 |         <entry file="file://$PROJECT_DIR$/deeprl_hw1/rlvaliterchngd.py">
 90 |           <provider selected="true" editor-type-id="text-editor">
 91 |             <state relative-caret-position="3360">
 92 |               <caret line="228" column="63" lean-forward="false" selection-start-line="228" selection-start-column="63" selection-end-line="228" selection-end-column="63" />
 93 |               <folding />
 94 |             </state>
 95 |           </provider>
 96 |         </entry>
 97 |       </file>
 98 |     </leaf>
 99 |   </component>
100 |   <component name="FileTemplateManagerImpl">
101 |     <option name="RECENT_TEMPLATES">
102 |       <list>
103 |         <option value="Python Script" />
104 |       </list>
105 |     </option>
106 |   </component>
107 |   <component name="IdeDocumentHistory">
108 |     <option name="CHANGED_PATHS">
109 |       <list>
110 |         <option value="$PROJECT_DIR$/example.py" />
111 |         <option value="$PROJECT_DIR$/driver.py" />
112 |         <option value="$PROJECT_DIR$/driver2a.py" />
113 |         <option value="$PROJECT_DIR$/deeprl_hw1/rlvaliterchngd.py" />
114 |         <option value="$PROJECT_DIR$/driver2b.py" />
115 |         <option value="$PROJECT_DIR$/deeprl_hw1/rl1.py" />
116 |         <option value="$PROJECT_DIR$/driver2c.py" />
117 |         <option value="$PROJECT_DIR$/deeprl_hw1/queue_envs.py" />
118 |         <option value="$PROJECT_DIR$/deeprl_hw1/driver3.py" />
119 |       </list>
120 |     </option>
121 |   </component>
122 |   <component name="ProjectFrameBounds">
123 |     <option name="x" value="186" />
124 |     <option name="y" value="23" />
125 |     <option name="width" value="1068" />
126 |     <option name="height" value="692" />
127 |   </component>
128 |   <component name="ProjectInspectionProfilesVisibleTreeState">
129 |     <entry key="Project Default">
130 |       <profile-state>
131 |         <expanded-state>
132 |           <State>
133 |             <id />
134 |           </State>
135 |           <State>
136 |             <id>Python</id>
137 |           </State>
138 |         </expanded-state>
139 |         <selected-state>
140 |           <State>
141 |             <id>PyCompatibilityInspection</id>
142 |           </State>
143 |         </selected-state>
144 |       </profile-state>
145 |     </entry>
146 |   </component>
147 |   <component name="ProjectView">
148 |     <navigator currentView="ProjectPane" proportions="" version="1">
149 |       <flattenPackages />
150 |       <showMembers />
151 |       <showModules />
152 |       <showLibraryContents />
153 |       <hideEmptyPackages />
154 |       <abbreviatePackageNames />
155 |       <autoscrollToSource />
156 |       <autoscrollFromSource />
157 |       <sortByType />
158 |       <manualOrder />
159 |       <foldersAlwaysOnTop value="true" />
160 |     </navigator>
161 |     <panes>
162 |       <pane id="Scope" />
163 |       <pane id="ProjectPane">
164 |         <subPane>
165 |           <PATH>
166 |             <PATH_ELEMENT>
167 |               <option name="myItemId" value="deeprl_hw1_src" />
168 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
169 |             </PATH_ELEMENT>
170 |             <PATH_ELEMENT>
171 |               <option name="myItemId" value="deeprl_hw1_src" />
172 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
173 |             </PATH_ELEMENT>
174 |           </PATH>
175 |           <PATH>
176 |             <PATH_ELEMENT>
177 |               <option name="myItemId" value="deeprl_hw1_src" />
178 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
179 |             </PATH_ELEMENT>
180 |             <PATH_ELEMENT>
181 |               <option name="myItemId" value="deeprl_hw1_src" />
182 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
183 |             </PATH_ELEMENT>
184 |             <PATH_ELEMENT>
185 |               <option name="myItemId" value="deeprl_hw1" />
186 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
187 |             </PATH_ELEMENT>
188 |           </PATH>
189 |         </subPane>
190 |       </pane>
191 |       <pane id="Scratches" />
192 |     </panes>
193 |   </component>
194 |   <component name="PropertiesComponent">
195 |     <property name="last_opened_file_path" value="$PROJECT_DIR$" />
196 |   </component>
197 |   <component name="RecentsManager">
198 |     <key name="CopyFile.RECENT_KEYS">
199 |       <recent name="$PROJECT_DIR$/deeprl_hw1" />
200 |     </key>
201 |   </component>
202 |   <component name="RunManager" selected="Python.driver3">
203 |     <configuration default="true" type="PythonConfigurationType" factoryName="Python">
204 |       <option name="INTERPRETER_OPTIONS" value="" />
205 |       <option name="PARENT_ENVS" value="true" />
206 |       <envs>
207 |         <env name="PYTHONUNBUFFERED" value="1" />
208 |       </envs>
209 |       <option name="SDK_HOME" value="" />
210 |       <option name="WORKING_DIRECTORY" value="" />
211 |       <option name="IS_MODULE_SDK" value="false" />
212 |       <option name="ADD_CONTENT_ROOTS" value="true" />
213 |       <option name="ADD_SOURCE_ROOTS" value="true" />
214 |       <module name="deeprl_hw1_src" />
215 |       <option name="SCRIPT_NAME" value="" />
216 |       <option name="PARAMETERS" value="" />
217 |       <option name="SHOW_COMMAND_LINE" value="false" />
218 |       <method />
219 |     </configuration>
220 |     <configuration default="true" type="Tox" factoryName="Tox">
221 |       <option name="INTERPRETER_OPTIONS" value="" />
222 |       <option name="PARENT_ENVS" value="true" />
223 |       <envs />
224 |       <option name="SDK_HOME" value="" />
225 |       <option name="WORKING_DIRECTORY" value="" />
226 |       <option name="IS_MODULE_SDK" value="false" />
227 |       <option name="ADD_CONTENT_ROOTS" value="true" />
228 |       <option name="ADD_SOURCE_ROOTS" value="true" />
229 |       <module name="deeprl_hw1_src" />
230 |       <method />
231 |     </configuration>
232 |     <configuration default="true" type="tests" factoryName="Attests">
233 |       <option name="INTERPRETER_OPTIONS" value="" />
234 |       <option name="PARENT_ENVS" value="true" />
235 |       <envs />
236 |       <option name="SDK_HOME" value="" />
237 |       <option name="WORKING_DIRECTORY" value="" />
238 |       <option name="IS_MODULE_SDK" value="false" />
239 |       <option name="ADD_CONTENT_ROOTS" value="true" />
240 |       <option name="ADD_SOURCE_ROOTS" value="true" />
241 |       <module name="deeprl_hw1_src" />
242 |       <option name="SCRIPT_NAME" value="" />
243 |       <option name="CLASS_NAME" value="" />
244 |       <option name="METHOD_NAME" value="" />
245 |       <option name="FOLDER_NAME" value="" />
246 |       <option name="TEST_TYPE" value="TEST_SCRIPT" />
247 |       <option name="PATTERN" value="" />
248 |       <option name="USE_PATTERN" value="false" />
249 |       <method />
250 |     </configuration>
251 |     <configuration default="true" type="tests" factoryName="Doctests">
252 |       <option name="INTERPRETER_OPTIONS" value="" />
253 |       <option name="PARENT_ENVS" value="true" />
254 |       <envs />
255 |       <option name="SDK_HOME" value="" />
256 |       <option name="WORKING_DIRECTORY" value="" />
257 |       <option name="IS_MODULE_SDK" value="false" />
258 |       <option name="ADD_CONTENT_ROOTS" value="true" />
259 |       <option name="ADD_SOURCE_ROOTS" value="true" />
260 |       <module name="deeprl_hw1_src" />
261 |       <option name="SCRIPT_NAME" value="" />
262 |       <option name="CLASS_NAME" value="" />
263 |       <option name="METHOD_NAME" value="" />
264 |       <option name="FOLDER_NAME" value="" />
265 |       <option name="TEST_TYPE" value="TEST_SCRIPT" />
266 |       <option name="PATTERN" value="" />
267 |       <option name="USE_PATTERN" value="false" />
268 |       <method />
269 |     </configuration>
270 |     <configuration default="true" type="tests" factoryName="Nosetests">
271 |       <option name="INTERPRETER_OPTIONS" value="" />
272 |       <option name="PARENT_ENVS" value="true" />
273 |       <envs />
274 |       <option name="SDK_HOME" value="" />
275 |       <option name="WORKING_DIRECTORY" value="" />
276 |       <option name="IS_MODULE_SDK" value="false" />
277 |       <option name="ADD_CONTENT_ROOTS" value="true" />
278 |       <option name="ADD_SOURCE_ROOTS" value="true" />
279 |       <module name="deeprl_hw1_src" />
280 |       <option name="SCRIPT_NAME" value="" />
281 |       <option name="CLASS_NAME" value="" />
282 |       <option name="METHOD_NAME" value="" />
283 |       <option name="FOLDER_NAME" value="" />
284 |       <option name="TEST_TYPE" value="TEST_SCRIPT" />
285 |       <option name="PATTERN" value="" />
286 |       <option name="USE_PATTERN" value="false" />
287 |       <option name="PARAMS" value="" />
288 |       <option name="USE_PARAM" value="false" />
289 |       <method />
290 |     </configuration>
291 |     <configuration default="true" type="tests" factoryName="Unittests">
292 |       <option name="INTERPRETER_OPTIONS" value="" />
293 |       <option name="PARENT_ENVS" value="true" />
294 |       <envs />
295 |       <option name="SDK_HOME" value="" />
296 |       <option name="WORKING_DIRECTORY" value="" />
297 |       <option name="IS_MODULE_SDK" value="false" />
298 |       <option name="ADD_CONTENT_ROOTS" value="true" />
299 |       <option name="ADD_SOURCE_ROOTS" value="true" />
300 |       <module name="deeprl_hw1_src" />
301 |       <option name="SCRIPT_NAME" value="" />
302 |       <option name="CLASS_NAME" value="" />
303 |       <option name="METHOD_NAME" value="" />
304 |       <option name="FOLDER_NAME" value="" />
305 |       <option name="TEST_TYPE" value="TEST_SCRIPT" />
306 |       <option name="PATTERN" value="" />
307 |       <option name="USE_PATTERN" value="false" />
308 |       <option name="PUREUNITTEST" value="true" />
309 |       <option name="PARAMS" value="" />
310 |       <option name="USE_PARAM" value="false" />
311 |       <method />
312 |     </configuration>
313 |     <configuration default="true" type="tests" factoryName="py.test">
314 |       <option name="INTERPRETER_OPTIONS" value="" />
315 |       <option name="PARENT_ENVS" value="true" />
316 |       <envs />
317 |       <option name="SDK_HOME" value="" />
318 |       <option name="WORKING_DIRECTORY" value="" />
319 |       <option name="IS_MODULE_SDK" value="false" />
320 |       <option name="ADD_CONTENT_ROOTS" value="true" />
321 |       <option name="ADD_SOURCE_ROOTS" value="true" />
322 |       <module name="deeprl_hw1_src" />
323 |       <option name="SCRIPT_NAME" value="" />
324 |       <option name="CLASS_NAME" value="" />
325 |       <option name="METHOD_NAME" value="" />
326 |       <option name="FOLDER_NAME" value="" />
327 |       <option name="TEST_TYPE" value="TEST_SCRIPT" />
328 |       <option name="PATTERN" value="" />
329 |       <option name="USE_PATTERN" value="false" />
330 |       <option name="testToRun" value="" />
331 |       <option name="keywords" value="" />
332 |       <option name="params" value="" />
333 |       <option name="USE_PARAM" value="false" />
334 |       <option name="USE_KEYWORD" value="false" />
335 |       <method />
336 |     </configuration>
337 |     <configuration default="false" name="example" type="PythonConfigurationType" factoryName="Python">
338 |       <option name="INTERPRETER_OPTIONS" value="" />
339 |       <option name="PARENT_ENVS" value="true" />
340 |       <envs>
341 |         <env name="PYTHONUNBUFFERED" value="1" />
342 |       </envs>
343 |       <option name="SDK_HOME" value="$USER_HOME$/anaconda/bin/python" />
344 |       <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
345 |       <option name="IS_MODULE_SDK" value="false" />
346 |       <option name="ADD_CONTENT_ROOTS" value="true" />
347 |       <option name="ADD_SOURCE_ROOTS" value="true" />
348 |       <module name="deeprl_hw1_src" />
349 |       <option name="SCRIPT_NAME" value="$PROJECT_DIR$/example.py" />
350 |       <option name="PARAMETERS" value="" />
351 |       <option name="SHOW_COMMAND_LINE" value="false" />
352 |       <method />
353 |     </configuration>
354 |     <configuration default="false" name="driver" type="PythonConfigurationType" factoryName="Python">
355 |       <option name="INTERPRETER_OPTIONS" value="" />
356 |       <option name="PARENT_ENVS" value="true" />
357 |       <envs>
358 |         <env name="PYTHONUNBUFFERED" value="1" />
359 |       </envs>
360 |       <option name="SDK_HOME" value="$USER_HOME$/anaconda/bin/python" />
361 |       <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
362 |       <option name="IS_MODULE_SDK" value="false" />
363 |       <option name="ADD_CONTENT_ROOTS" value="true" />
364 |       <option name="ADD_SOURCE_ROOTS" value="true" />
365 |       <module name="deeprl_hw1_src" />
366 |       <option name="SCRIPT_NAME" value="$PROJECT_DIR$/driver2a.py" />
367 |       <option name="PARAMETERS" value="" />
368 |       <option name="SHOW_COMMAND_LINE" value="false" />
369 |       <method />
370 |     </configuration>
371 |     <configuration default="false" name="driver2c" type="PythonConfigurationType" factoryName="Python">
372 |       <option name="INTERPRETER_OPTIONS" value="" />
373 |       <option name="PARENT_ENVS" value="true" />
374 |       <envs>
375 |         <env name="PYTHONUNBUFFERED" value="1" />
376 |       </envs>
377 |       <option name="SDK_HOME" value="$USER_HOME$/anaconda/bin/python" />
378 |       <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
379 |       <option name="IS_MODULE_SDK" value="false" />
380 |       <option name="ADD_CONTENT_ROOTS" value="true" />
381 |       <option name="ADD_SOURCE_ROOTS" value="true" />
382 |       <module name="deeprl_hw1_src" />
383 |       <option name="SCRIPT_NAME" value="$PROJECT_DIR$/driver2c.py" />
384 |       <option name="PARAMETERS" value="" />
385 |       <option name="SHOW_COMMAND_LINE" value="false" />
386 |       <method />
387 |     </configuration>
388 |     <configuration default="false" name="driver3" type="PythonConfigurationType" factoryName="Python">
389 |       <option name="INTERPRETER_OPTIONS" value="" />
390 |       <option name="PARENT_ENVS" value="true" />
391 |       <envs>
392 |         <env name="PYTHONUNBUFFERED" value="1" />
393 |       </envs>
394 |       <option name="SDK_HOME" value="$USER_HOME$/anaconda/bin/python" />
395 |       <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/deeprl_hw1" />
396 |       <option name="IS_MODULE_SDK" value="false" />
397 |       <option name="ADD_CONTENT_ROOTS" value="true" />
398 |       <option name="ADD_SOURCE_ROOTS" value="true" />
399 |       <module name="deeprl_hw1_src" />
400 |       <option name="SCRIPT_NAME" value="$PROJECT_DIR$/deeprl_hw1/driver3.py" />
401 |       <option name="PARAMETERS" value="" />
402 |       <option name="SHOW_COMMAND_LINE" value="false" />
403 |       <method />
404 |     </configuration>
405 |     <list size="4">
406 |       <item index="0" class="java.lang.String" itemvalue="Python.example" />
407 |       <item index="1" class="java.lang.String" itemvalue="Python.driver" />
408 |       <item index="2" class="java.lang.String" itemvalue="Python.driver2c" />
409 |       <item index="3" class="java.lang.String" itemvalue="Python.driver3" />
410 |     </list>
411 |   </component>
412 |   <component name="ShelveChangesManager" show_recycled="false">
413 |     <option name="remove_strategy" value="false" />
414 |   </component>
415 |   <component name="TaskManager">
416 |     <task active="true" id="Default" summary="Default task">
417 |       <changelist id="ebbff014-0df4-43eb-a60d-dcf03bf4dd61" name="Default" comment="" />
418 |       <created>1487050740220</created>
419 |       <option name="number" value="Default" />
420 |       <option name="presentableId" value="Default" />
421 |       <updated>1487050740220</updated>
422 |     </task>
423 |     <servers />
424 |   </component>
425 |   <component name="ToolWindowManager">
426 |     <frame x="186" y="23" width="1068" height="692" extended-state="0" />
427 |     <editor active="false" />
428 |     <layout>
429 |       <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.1843362" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
430 |       <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
431 |       <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32809225" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
432 |       <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
433 |       <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
434 |       <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.40166667" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
435 |       <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
436 |       <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
437 |       <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
438 |       <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.42333335" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
439 |       <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
440 |       <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
441 |       <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
442 |       <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
443 |       <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
444 |       <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
445 |       <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
446 |     </layout>
447 |   </component>
448 |   <component name="VcsContentAnnotationSettings">
449 |     <option name="myLimit" value="2678400000" />
450 |   </component>
451 |   <component name="XDebuggerManager">
452 |     <breakpoint-manager>
453 |       <option name="time" value="16" />
454 |     </breakpoint-manager>
455 |     <watches-manager />
456 |   </component>
457 |   <component name="editorHistoryManager">
458 |     <entry file="file://$PROJECT_DIR$/example.py">
459 |       <provider selected="true" editor-type-id="text-editor">
460 |         <state relative-caret-position="510">
461 |           <caret line="40" column="0" lean-forward="false" selection-start-line="40" selection-start-column="0" selection-end-line="40" selection-end-column="0" />
462 |           <folding>
463 |             <element signature="e#39#147#0" expanded="true" />
464 |           </folding>
465 |         </state>
466 |       </provider>
467 |     </entry>
468 |     <entry file="file://$PROJECT_DIR$/driver2a.py">
469 |       <provider selected="true" editor-type-id="text-editor">
470 |         <state relative-caret-position="150">
471 |           <caret line="16" column="10" lean-forward="false" selection-start-line="16" selection-start-column="10" selection-end-line="16" selection-end-column="10" />
472 |           <folding />
473 |         </state>
474 |       </provider>
475 |     </entry>
476 |     <entry file="file://$PROJECT_DIR$/driver2b.py">
477 |       <provider selected="true" editor-type-id="text-editor">
478 |         <state relative-caret-position="135">
479 |           <caret line="15" column="31" lean-forward="false" selection-start-line="15" selection-start-column="31" selection-end-line="15" selection-end-column="31" />
480 |           <folding>
481 |             <element signature="e#0#39#0" expanded="true" />
482 |           </folding>
483 |         </state>
484 |       </provider>
485 |     </entry>
486 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/lake_envs.py">
487 |       <provider selected="true" editor-type-id="text-editor">
488 |         <state relative-caret-position="630">
489 |           <caret line="48" column="53" lean-forward="false" selection-start-line="48" selection-start-column="53" selection-end-line="48" selection-end-column="53" />
490 |           <folding />
491 |         </state>
492 |       </provider>
493 |     </entry>
494 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/queue_envs.py">
495 |       <provider selected="true" editor-type-id="text-editor">
496 |         <state relative-caret-position="0">
497 |           <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
498 |           <folding>
499 |             <element signature="e#73#181#0" expanded="true" />
500 |           </folding>
501 |         </state>
502 |       </provider>
503 |     </entry>
504 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/rl.py">
505 |       <provider selected="true" editor-type-id="text-editor">
506 |         <state relative-caret-position="390">
507 |           <caret line="29" column="11" lean-forward="false" selection-start-line="29" selection-start-column="11" selection-end-line="29" selection-end-column="11" />
508 |           <folding />
509 |         </state>
510 |       </provider>
511 |     </entry>
512 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/rl1.py">
513 |       <provider selected="true" editor-type-id="text-editor">
514 |         <state relative-caret-position="3330">
515 |           <caret line="226" column="26" lean-forward="false" selection-start-line="226" selection-start-column="26" selection-end-line="226" selection-end-column="26" />
516 |           <folding>
517 |             <element signature="e#16#64#0" expanded="true" />
518 |           </folding>
519 |         </state>
520 |       </provider>
521 |     </entry>
522 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/rlvaliterchngd.py">
523 |       <provider selected="true" editor-type-id="text-editor">
524 |         <state relative-caret-position="3360">
525 |           <caret line="228" column="63" lean-forward="false" selection-start-line="228" selection-start-column="63" selection-end-line="228" selection-end-column="63" />
526 |           <folding />
527 |         </state>
528 |       </provider>
529 |     </entry>
530 |     <entry file="file://$PROJECT_DIR$/example.py">
531 |       <provider selected="true" editor-type-id="text-editor">
532 |         <state relative-caret-position="510">
533 |           <caret line="40" column="0" lean-forward="false" selection-start-line="40" selection-start-column="0" selection-end-line="40" selection-end-column="0" />
534 |           <folding>
535 |             <element signature="e#39#147#0" expanded="true" />
536 |           </folding>
537 |         </state>
538 |       </provider>
539 |     </entry>
540 |     <entry file="file://$PROJECT_DIR$/driver2a.py">
541 |       <provider selected="true" editor-type-id="text-editor">
542 |         <state relative-caret-position="600">
543 |           <caret line="46" column="22" lean-forward="true" selection-start-line="46" selection-start-column="22" selection-end-line="46" selection-end-column="22" />
544 |           <folding />
545 |         </state>
546 |       </provider>
547 |     </entry>
548 |     <entry file="file://$PROJECT_DIR$/driver2b.py">
549 |       <provider selected="true" editor-type-id="text-editor">
550 |         <state relative-caret-position="135">
551 |           <caret line="15" column="31" lean-forward="true" selection-start-line="15" selection-start-column="31" selection-end-line="15" selection-end-column="31" />
552 |           <folding>
553 |             <element signature="e#0#39#0" expanded="true" />
554 |           </folding>
555 |         </state>
556 |       </provider>
557 |     </entry>
558 |     <entry file="file://$PROJECT_DIR$/driver2c.py">
559 |       <provider selected="true" editor-type-id="text-editor">
560 |         <state relative-caret-position="435">
561 |           <caret line="35" column="10" lean-forward="true" selection-start-line="35" selection-start-column="10" selection-end-line="35" selection-end-column="10" />
562 |         </state>
563 |       </provider>
564 |     </entry>
565 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/lake_envs.py">
566 |       <provider selected="true" editor-type-id="text-editor">
567 |         <state relative-caret-position="630">
568 |           <caret line="48" column="53" lean-forward="true" selection-start-line="48" selection-start-column="53" selection-end-line="48" selection-end-column="53" />
569 |           <folding />
570 |         </state>
571 |       </provider>
572 |     </entry>
573 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/queue_envs.py">
574 |       <provider selected="true" editor-type-id="text-editor">
575 |         <state relative-caret-position="0">
576 |           <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
577 |           <folding>
578 |             <element signature="e#73#181#0" expanded="true" />
579 |           </folding>
580 |         </state>
581 |       </provider>
582 |     </entry>
583 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/rl.py">
584 |       <provider selected="true" editor-type-id="text-editor">
585 |         <state relative-caret-position="390">
586 |           <caret line="29" column="11" lean-forward="false" selection-start-line="29" selection-start-column="11" selection-end-line="29" selection-end-column="11" />
587 |           <folding />
588 |         </state>
589 |       </provider>
590 |     </entry>
591 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/rl1.py">
592 |       <provider selected="true" editor-type-id="text-editor">
593 |         <state relative-caret-position="3330">
594 |           <caret line="226" column="26" lean-forward="false" selection-start-line="226" selection-start-column="26" selection-end-line="226" selection-end-column="26" />
595 |           <folding>
596 |             <element signature="e#16#64#0" expanded="true" />
597 |           </folding>
598 |         </state>
599 |       </provider>
600 |     </entry>
601 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/rlvaliterchngd.py">
602 |       <provider selected="true" editor-type-id="text-editor">
603 |         <state relative-caret-position="3360">
604 |           <caret line="228" column="63" lean-forward="true" selection-start-line="228" selection-start-column="63" selection-end-line="228" selection-end-column="63" />
605 |           <folding />
606 |         </state>
607 |       </provider>
608 |     </entry>
609 |     <entry file="file://$PROJECT_DIR$/setup.py">
610 |       <provider selected="true" editor-type-id="text-editor">
611 |         <state relative-caret-position="0">
612 |           <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
613 |         </state>
614 |       </provider>
615 |     </entry>
616 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/rl.py">
617 |       <provider selected="true" editor-type-id="text-editor">
618 |         <state relative-caret-position="390">
619 |           <caret line="29" column="11" lean-forward="false" selection-start-line="29" selection-start-column="11" selection-end-line="29" selection-end-column="11" />
620 |           <folding />
621 |         </state>
622 |       </provider>
623 |     </entry>
624 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/rlvaliterchngd.py">
625 |       <provider selected="true" editor-type-id="text-editor">
626 |         <state relative-caret-position="3360">
627 |           <caret line="228" column="63" lean-forward="false" selection-start-line="228" selection-start-column="63" selection-end-line="228" selection-end-column="63" />
628 |           <folding />
629 |         </state>
630 |       </provider>
631 |     </entry>
632 |     <entry file="file://$PROJECT_DIR$/driver2c.py">
633 |       <provider selected="true" editor-type-id="text-editor">
634 |         <state relative-caret-position="435">
635 |           <caret line="35" column="10" lean-forward="false" selection-start-line="35" selection-start-column="10" selection-end-line="35" selection-end-column="10" />
636 |         </state>
637 |       </provider>
638 |     </entry>
639 |     <entry file="file://$PROJECT_DIR$/README.md">
640 |       <provider selected="true" editor-type-id="text-editor">
641 |         <state relative-caret-position="0">
642 |           <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
643 |         </state>
644 |       </provider>
645 |     </entry>
646 |     <entry file="file://$PROJECT_DIR$/example.py">
647 |       <provider selected="true" editor-type-id="text-editor">
648 |         <state relative-caret-position="0">
649 |           <caret line="40" column="0" lean-forward="false" selection-start-line="40" selection-start-column="0" selection-end-line="40" selection-end-column="0" />
650 |           <folding>
651 |             <element signature="e#39#147#0" expanded="true" />
652 |           </folding>
653 |         </state>
654 |       </provider>
655 |     </entry>
656 |     <entry file="file://$PROJECT_DIR$/driver2a.py">
657 |       <provider selected="true" editor-type-id="text-editor">
658 |         <state relative-caret-position="15">
659 |           <caret line="1" column="10" lean-forward="true" selection-start-line="1" selection-start-column="10" selection-end-line="1" selection-end-column="10" />
660 |           <folding />
661 |         </state>
662 |       </provider>
663 |     </entry>
664 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/lake_envs.py">
665 |       <provider selected="true" editor-type-id="text-editor">
666 |         <state relative-caret-position="720">
667 |           <caret line="48" column="53" lean-forward="false" selection-start-line="48" selection-start-column="53" selection-end-line="48" selection-end-column="53" />
668 |           <folding>
669 |             <element signature="e#54#162#0" expanded="true" />
670 |           </folding>
671 |         </state>
672 |       </provider>
673 |     </entry>
674 |     <entry file="file://$PROJECT_DIR$/driver2b.py">
675 |       <provider selected="true" editor-type-id="text-editor">
676 |         <state relative-caret-position="255">
677 |           <caret line="17" column="65" lean-forward="true" selection-start-line="17" selection-start-column="65" selection-end-line="17" selection-end-column="65" />
678 |           <folding>
679 |             <element signature="e#0#39#0" expanded="true" />
680 |           </folding>
681 |         </state>
682 |       </provider>
683 |     </entry>
684 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/rl1.py">
685 |       <provider selected="true" editor-type-id="text-editor">
686 |         <state relative-caret-position="294">
687 |           <caret line="226" column="26" lean-forward="false" selection-start-line="226" selection-start-column="26" selection-end-line="226" selection-end-column="26" />
688 |           <folding>
689 |             <element signature="e#16#64#0" expanded="true" />
690 |           </folding>
691 |         </state>
692 |       </provider>
693 |     </entry>
694 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/driver3.py">
695 |       <provider selected="true" editor-type-id="text-editor">
696 |         <state relative-caret-position="225">
697 |           <caret line="15" column="12" lean-forward="true" selection-start-line="15" selection-start-column="12" selection-end-line="15" selection-end-column="12" />
698 |           <folding>
699 |             <element signature="e#0#36#0" expanded="true" />
700 |           </folding>
701 |         </state>
702 |       </provider>
703 |     </entry>
704 |     <entry file="file://$PROJECT_DIR$/deeprl_hw1/queue_envs.py">
705 |       <provider selected="true" editor-type-id="text-editor">
706 |         <state relative-caret-position="135">
707 |           <caret line="103" column="20" lean-forward="false" selection-start-line="103" selection-start-column="8" selection-end-line="103" selection-end-column="20" />
708 |           <folding>
709 |             <element signature="e#73#181#0" expanded="true" />
710 |           </folding>
711 |         </state>
712 |       </provider>
713 |     </entry>
714 |   </component>
715 | </project>


--------------------------------------------------------------------------------