├── .gitignore
├── README.md
├── lab1
    ├── LICENSE
    ├── Lab 1 - Problem 1.ipynb
    ├── Lab 1 - Problem 2.ipynb
    ├── Lab 1 - Problem 3.ipynb
    ├── crawler_env.py
    ├── discrete_env.py
    ├── environment.yml
    ├── frozen_lake.py
    ├── lab1.md
    └── misc.py
├── lab2
    ├── LICENSE
    ├── Lab 2.ipynb
    ├── environment.yml
    └── lab2.md
├── lab3
    ├── LICENSE
    ├── docker_run.sh
    ├── docker_run_vnc.sh
    ├── environment.yml
    ├── findport.py
    ├── findport.sh
    ├── lab3.pdf
    ├── launch_bg_screen_buffer.sh
    ├── logger.py
    ├── scripts
    │   ├── setup_xquartz.sh
    │   └── test_environment_setup.py
    ├── simpledqn
    │   ├── __init__.py
    │   ├── gridworld_env.py
    │   ├── main.py
    │   ├── replay_buffer.py
    │   ├── replay_buffer_warm_start.pkl
    │   ├── simple_utils.py
    │   ├── weights_warm_start.pkl
    │   └── wrappers.py
    └── viskit
    │   ├── __init__.py
    │   ├── core.py
    │   ├── frontend.py
    │   ├── static
    │       ├── css
    │       │   ├── bootstrap.min.css
    │       │   └── dropdowns-enhancement.css
    │       └── js
    │       │   ├── bootstrap.min.js
    │       │   ├── dropdowns-enhancement.js
    │       │   ├── jquery-1.10.2.min.js
    │       │   ├── jquery.loadTemplate-1.5.6.js
    │       │   └── plotly-latest.min.js
    │   └── templates
    │       └── main.html
├── lab4
    ├── LICENSE
    ├── a2c.py
    ├── alg_utils.py
    ├── algs.py
    ├── docker_run.sh
    ├── docker_run_vnc.sh
    ├── env_makers.py
    ├── environment.yml
    ├── experiments
    │   ├── run_a2c_breakout.py
    │   ├── run_a2c_pong.py
    │   ├── run_a2c_pong_warm_start.py
    │   ├── run_pg_cartpole.py
    │   ├── run_trpo_cartpole.py
    │   ├── run_trpo_half_cheetah.py
    │   └── run_trpo_pendulum.py
    ├── findport.py
    ├── lab4.pdf
    ├── launch_bg_screen_buffer.sh
    ├── logger.py
    ├── models.py
    ├── pg.py
    ├── pong_warm_start.pkl
    ├── scripts
    │   ├── resume_training.py
    │   ├── setup_xquartz.sh
    │   ├── sim_policy.py
    │   └── test_environment_setup.py
    ├── simplepg
    │   ├── main.py
    │   ├── point_env.py
    │   ├── rollout.py
    │   └── simple_utils.py
    ├── tests
    │   ├── a2c_tests.py
    │   ├── pg_tests.py
    │   ├── simplepg_tests.py
    │   └── trpo_tests.py
    ├── trpo.py
    ├── utils.py
    └── viskit
    │   ├── __init__.py
    │   ├── core.py
    │   ├── frontend.py
    │   ├── static
    │       ├── css
    │       │   ├── bootstrap.min.css
    │       │   └── dropdowns-enhancement.css
    │       └── js
    │       │   ├── bootstrap.min.js
    │       │   ├── dropdowns-enhancement.js
    │       │   ├── jquery-1.10.2.min.js
    │       │   ├── jquery.loadTemplate-1.5.6.js
    │       │   └── plotly-latest.min.js
    │   └── templates
    │       └── main.html
└── prelab
    ├── docker_run.sh
    ├── docker_run_vnc.sh
    ├── environment.yml
    ├── findport.py
    ├── launch_bg_screen_buffer.sh
    ├── prelab.pdf
    ├── scripts
        ├── setup_xquartz.sh
        └── test_environment_setup.py
    └── simplepg
        ├── point_env.py
        ├── rollout.py
        └── simple_utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | data/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### Solutions to the [Deep RL Bootcamp](https://sites.google.com/view/deep-rl-bootcamp) labs
2 | 
3 | * [Prelab](prelab): Set up your computer for all labs.
4 | * [Lab 1](lab1): Markov Decision Processes. You will implement value iteration, policy iteration, and tabular Q-learning and apply these algorithms to simple environments including tabular maze navigation (FrozenLake) and controlling a simple crawler robot.
5 | * [Lab 2](lab2): Introduction to Chainer. You will implement deep supervised learning using Chainer, and apply it to the MNIST dataset.
6 | * [Lab 3](lab3): Deep Q-Learning. You will implement the DQN algorithm and apply it to Atari games.
7 | * [Lab 4](lab4): Policy Optimization Algorithms. You will implement various policy optimization algorithms, including policy gradient, natural policy gradient, trust-region policy optimization (TRPO), and asynchronous advantage actor-critic (A3C). You will apply these algorithms to classic control tasks, Atari games, and roboschool locomotion environments.


--------------------------------------------------------------------------------
/lab1/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2017 Deep RL Bootcamp Organizers.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/lab1/discrete_env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This project was developed by Peter Chen, Rocky Duan, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 3 | 
 4 | Copyright 2017 Deep RL Bootcamp Organizers.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 
12 | """
13 | 
14 | 
15 | import numpy as np
16 | 
17 | from gym import Env, spaces
18 | from gym.utils import seeding
19 | 
20 | def categorical_sample(prob_n, np_random):
21 |     """
22 |     Sample from categorical distribution
23 |     Each row specifies class probabilities
24 |     """
25 |     prob_n = np.asarray(prob_n)
26 |     csprob_n = np.cumsum(prob_n)
27 |     return (csprob_n > np_random.rand()).argmax()
28 | 
29 | 
30 | class DiscreteEnv(Env):
31 | 
32 |     """
33 |     Has the following members
34 |     - nS: number of states
35 |     - nA: number of actions
36 |     - P: transitions (*)
37 |     - isd: initial state distribution (**)
38 | 
39 |     (*) dictionary dict of dicts of lists, where
40 |       P[s][a] == [(probability, nextstate, reward, done), ...]
41 |     (**) list or array of length nS
42 | 
43 | 
44 |     """
45 |     def __init__(self, nS, nA, P, isd):
46 |         self.P = P
47 |         self.isd = isd
48 |         self.lastaction=None # for rendering
49 |         self.nS = nS
50 |         self.nA = nA
51 | 
52 |         self.action_space = spaces.Discrete(self.nA)
53 |         self.observation_space = spaces.Discrete(self.nS)
54 | 
55 |         self._seed()
56 |         self._reset()
57 | 
58 |     def _seed(self, seed=None):
59 |         self.np_random, seed = seeding.np_random(seed)
60 |         return [seed]
61 | 
62 |     def _reset(self):
63 |         self.s = categorical_sample(self.isd, self.np_random)
64 |         self.lastaction=None
65 |         return self.s
66 | 
67 |     def _step(self, a):
68 |         transitions = self.P[self.s][a]
69 |         i = categorical_sample([t[0] for t in transitions], self.np_random)
70 |         p, s, r, d= transitions[i]
71 |         self.s = s
72 |         self.lastaction=a
73 |         return (s, r, d, {"prob" : p})
74 | 


--------------------------------------------------------------------------------
/lab1/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | channels:
 3 |     - menpo
 4 |     - soumith
 5 | dependencies:
 6 |     - python==3.5.3
 7 |     - opencv3=3.1.0
 8 |     - numpy==1.13.1
 9 |     - scipy==0.19.1
10 |     - notebook
11 |     - pip:
12 |         - gym==0.9.2
13 |         - chainer==2.0.1
14 |         - ipdb==0.10.3
15 |         - tblib==1.3.2
16 |         - Pillow==4.2.1
17 |         - PyOpenGL==3.1.0
18 |         - cloudpickle==0.3.1
19 |         - click==6.7
20 |         - python-dateutil==2.6.1
21 |         - pyyaml==3.12
22 |         - easydict==1.7
23 |         - boto3==1.4.4
24 |         - mako==1.0.7
25 |         - redis==2.10.5
26 |         - Flask==0.12.2
27 |         - plotly==2.0.12
28 |         - tqdm==4.14.0
29 |         - cupy==1.0.1; 'linux' in sys_platform
30 |         - cached-property==1.3.0
31 |         - h5py==2.7.0
32 |         - matplotlib
33 | 


--------------------------------------------------------------------------------
/lab1/frozen_lake.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Peter Chen, Rocky Duan, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Code adapted from Berkeley Deep RL Class [HW2](https://github.com/berkeleydeeprlcourse/homework/blob/c1027d83cd542e67ebed982d44666e0d22a00141/hw2/HW2.ipynb) [(license)](https://github.com/berkeleydeeprlcourse/homework/blob/master/LICENSE).
  5 | 
  6 | Copyright 2017 Deep RL Bootcamp Organizers.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  9 | 
 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 11 | 
 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 13 | 
 14 | """
 15 | 
 16 | 
 17 | import numpy as np
 18 | import sys
 19 | from six import StringIO, b
 20 | 
 21 | from gym import utils
 22 | import discrete_env
 23 | 
 24 | LEFT = 0
 25 | DOWN = 1
 26 | RIGHT = 2
 27 | UP = 3
 28 | 
 29 | MAPS = {
 30 |     "4x4": [
 31 |         "SFFF",
 32 |         "FHFH",
 33 |         "FFFH",
 34 |         "HFFG"
 35 |     ],
 36 |     "8x8": [
 37 |         "SFFFFFFF",
 38 |         "FFFFFFFF",
 39 |         "FFFHFFFF",
 40 |         "FFFFFHFF",
 41 |         "FFFHFFFF",
 42 |         "FHHFFFHF",
 43 |         "FHFFHFHF",
 44 |         "FFFHFFFG"
 45 |     ],
 46 | }
 47 | 
 48 | class FrozenLakeEnv(discrete_env.DiscreteEnv):
 49 |     """
 50 |     Winter is here. You and your friends were tossing around a frisbee at the park
 51 |     when you made a wild throw that left the frisbee out in the middle of the lake.
 52 |     The water is mostly frozen, but there are a few holes where the ice has melted.
 53 |     If you step into one of those holes, you'll fall into the freezing water.
 54 |     At this time, there's an international frisbee shortage, so it's absolutely imperative that
 55 |     you navigate across the lake and retrieve the disc.
 56 |     However, the ice is slippery, so you won't always move in the direction you intend.
 57 |     The surface is described using a grid like the following
 58 | 
 59 |         SFFF
 60 |         FHFH
 61 |         FFFH
 62 |         HFFG
 63 | 
 64 |     S : starting point, safe
 65 |     F : frozen surface, safe
 66 |     H : hole, fall to your doom
 67 |     G : goal, where the frisbee is located
 68 | 
 69 |     The episode ends when you reach the goal or fall in a hole.
 70 |     You receive a reward of 1 if you reach the goal, and zero otherwise.
 71 | 
 72 |     """
 73 | 
 74 |     metadata = {'render.modes': ['human', 'ansi']}
 75 | 
 76 |     def __init__(self, desc=None, map_name="4x4",is_slippery=True):
 77 |         if desc is None and map_name is None:
 78 |             raise ValueError('Must provide either desc or map_name')
 79 |         elif desc is None:
 80 |             desc = MAPS[map_name]
 81 |         self.desc = desc = np.asarray(desc,dtype='c')
 82 |         self.nrow, self.ncol = nrow, ncol = desc.shape
 83 | 
 84 |         nA = 4
 85 |         nS = nrow * ncol
 86 | 
 87 |         isd = np.array(desc == b'S').astype('float64').ravel()
 88 |         isd /= isd.sum()
 89 | 
 90 |         P = {s : {a : [] for a in range(nA)} for s in range(nS)}
 91 | 
 92 |         def to_s(row, col):
 93 |             return row*ncol + col
 94 |         def inc(row, col, a):
 95 |             if a==0: # left
 96 |                 col = max(col-1,0)
 97 |             elif a==1: # down
 98 |                 row = min(row+1,nrow-1)
 99 |             elif a==2: # right
100 |                 col = min(col+1,ncol-1)
101 |             elif a==3: # up
102 |                 row = max(row-1,0)
103 |             return (row, col)
104 | 
105 |         for row in range(nrow):
106 |             for col in range(ncol):
107 |                 s = to_s(row, col)
108 |                 for a in range(4):
109 |                     li = P[s][a]
110 |                     letter = desc[row, col]
111 |                     if letter in b'GH':
112 |                         li.append((1.0, s, 0, True))
113 |                     else:
114 |                         if is_slippery:
115 |                             for b in [(a-1)%4, a, (a+1)%4]:
116 |                                 newrow, newcol = inc(row, col, b)
117 |                                 newstate = to_s(newrow, newcol)
118 |                                 newletter = desc[newrow, newcol]
119 |                                 done = bytes(newletter) in b'GH'
120 |                                 rew = float(newletter == b'G')
121 |                                 li.append((0.8 if b==a else 0.1, newstate, rew, done))
122 |                         else:
123 |                             newrow, newcol = inc(row, col, a)
124 |                             newstate = to_s(newrow, newcol)
125 |                             newletter = desc[newrow, newcol]
126 |                             done = bytes(newletter) in b'GH'
127 |                             rew = float(newletter == b'G')
128 |                             li.append((1.0, newstate, rew, done))
129 | 
130 |         super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
131 | 
132 |     def _render(self, mode='human', close=False):
133 |         if close:
134 |             return
135 |         outfile = StringIO() if mode == 'ansi' else sys.stdout
136 | 
137 |         row, col = self.s // self.ncol, self.s % self.ncol
138 |         desc = self.desc.tolist()
139 |         desc = [[c.decode('utf-8') for c in line] for line in desc]
140 |         desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
141 |         if self.lastaction is not None:
142 |             outfile.write("  ({})\n".format(["Left","Down","Right","Up"][self.lastaction]))
143 |         else:
144 |             outfile.write("\n")
145 |         outfile.write("\n".join(''.join(line) for line in desc)+"\n")
146 | 
147 |         return outfile
148 | 


--------------------------------------------------------------------------------
/lab1/lab1.md:
--------------------------------------------------------------------------------
 1 | * Activate the conda environment by running
 2 | 	```
 3 |     source activate deeprlbootcamp
 4 |     ```
 5 | * Launch IPython Notebook from this directory; this should open up a browser window where you can click to open Lab1.
 6 | 	```
 7 |     jupyter notebook
 8 |     ```
 9 | * After opening a lab file, click “File - Trust Notebook”
10 | * If you have never used IPython Notebook before, skim this quick tutorial here: http://cs231n.github.io/ipython-tutorial/ 
11 | 


--------------------------------------------------------------------------------
/lab1/misc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Peter Chen, Rocky Duan, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Code adapted from Berkeley Deep RL Class [HW2](https://github.com/berkeleydeeprlcourse/homework/blob/c1027d83cd542e67ebed982d44666e0d22a00141/hw2/HW2.ipynb) [(license)](https://github.com/berkeleydeeprlcourse/homework/blob/master/LICENSE).
  5 | 
  6 | Copyright 2017 Deep RL Bootcamp Organizers.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  9 | 
 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 11 | 
 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 13 | 
 14 | """
 15 | 
 16 | 
 17 | import numpy as np
 18 | import sys
 19 | from six import StringIO, b
 20 | 
 21 | from gym import utils
 22 | import discrete_env
 23 | 
 24 | LEFT = 0
 25 | DOWN = 1
 26 | RIGHT = 2
 27 | UP = 3
 28 | 
 29 | MAPS = {
 30 |     "4x4": [
 31 |         "SFFF",
 32 |         "FHFH",
 33 |         "FFFH",
 34 |         "HFFG"
 35 |     ],
 36 |     "8x8": [
 37 |         "SFFFFFFF",
 38 |         "FFFFFFFF",
 39 |         "FFFHFFFF",
 40 |         "FFFFFHFF",
 41 |         "FFFHFFFF",
 42 |         "FHHFFFHF",
 43 |         "FHFFHFHF",
 44 |         "FFFHFFFG"
 45 |     ],
 46 | }
 47 | 
 48 | class FrozenLakeEnv(discrete_env.DiscreteEnv):
 49 |     """
 50 |     Winter is here. You and your friends were tossing around a frisbee at the park
 51 |     when you made a wild throw that left the frisbee out in the middle of the lake.
 52 |     The water is mostly frozen, but there are a few holes where the ice has melted.
 53 |     If you step into one of those holes, you'll fall into the freezing water.
 54 |     At this time, there's an international frisbee shortage, so it's absolutely imperative that
 55 |     you navigate across the lake and retrieve the disc.
 56 |     However, the ice is slippery, so you won't always move in the direction you intend.
 57 |     The surface is described using a grid like the following
 58 | 
 59 |         SFFF
 60 |         FHFH
 61 |         FFFH
 62 |         HFFG
 63 | 
 64 |     S : starting point, safe
 65 |     F : frozen surface, safe
 66 |     H : hole, fall to your doom
 67 |     G : goal, where the frisbee is located
 68 | 
 69 |     The episode ends when you reach the goal or fall in a hole.
 70 |     You receive a reward of 1 if you reach the goal, and zero otherwise.
 71 | 
 72 |     """
 73 | 
 74 |     metadata = {'render.modes': ['human', 'ansi']}
 75 | 
 76 |     def __init__(self, desc=None, map_name="4x4",is_slippery=True):
 77 |         if desc is None and map_name is None:
 78 |             raise ValueError('Must provide either desc or map_name')
 79 |         elif desc is None:
 80 |             desc = MAPS[map_name]
 81 |         self.desc = desc = np.asarray(desc,dtype='c')
 82 |         self.nrow, self.ncol = nrow, ncol = desc.shape
 83 | 
 84 |         nA = 4
 85 |         nS = nrow * ncol
 86 | 
 87 |         isd = np.array(desc == b'S').astype('float64').ravel()
 88 |         isd /= isd.sum()
 89 | 
 90 |         P = {s : {a : [] for a in range(nA)} for s in range(nS)}
 91 | 
 92 |         def to_s(row, col):
 93 |             return row*ncol + col
 94 |         def inc(row, col, a):
 95 |             if a==0: # left
 96 |                 col = max(col-1,0)
 97 |             elif a==1: # down
 98 |                 row = min(row+1,nrow-1)
 99 |             elif a==2: # right
100 |                 col = min(col+1,ncol-1)
101 |             elif a==3: # up
102 |                 row = max(row-1,0)
103 |             return (row, col)
104 | 
105 |         for row in range(nrow):
106 |             for col in range(ncol):
107 |                 s = to_s(row, col)
108 |                 for a in range(4):
109 |                     li = P[s][a]
110 |                     letter = desc[row, col]
111 |                     if letter in b'GH':
112 |                         li.append((1.0, s, 0, True))
113 |                     else:
114 |                         if is_slippery:
115 |                             for b in [(a-1)%4, a, (a+1)%4]:
116 |                                 newrow, newcol = inc(row, col, b)
117 |                                 newstate = to_s(newrow, newcol)
118 |                                 newletter = desc[newrow, newcol]
119 |                                 done = bytes(newletter) in b'GH'
120 |                                 rew = float(newletter == b'G')
121 |                                 li.append((0.8 if b==a else 0.1, newstate, rew, done))
122 |                         else:
123 |                             newrow, newcol = inc(row, col, a)
124 |                             newstate = to_s(newrow, newcol)
125 |                             newletter = desc[newrow, newcol]
126 |                             done = bytes(newletter) in b'GH'
127 |                             rew = float(newletter == b'G')
128 |                             li.append((1.0, newstate, rew, done))
129 | 
130 |         super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
131 | 
132 |     def _render(self, mode='human', close=False):
133 |         if close:
134 |             return
135 |         outfile = StringIO() if mode == 'ansi' else sys.stdout
136 | 
137 |         row, col = self.s // self.ncol, self.s % self.ncol
138 |         desc = self.desc.tolist()
139 |         desc = [[c.decode('utf-8') for c in line] for line in desc]
140 |         desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
141 |         if self.lastaction is not None:
142 |             outfile.write("  ({})\n".format(["Left","Down","Right","Up"][self.lastaction]))
143 |         else:
144 |             outfile.write("\n")
145 |         outfile.write("\n".join(''.join(line) for line in desc)+"\n")
146 | 
147 |         return outfile
148 | 
149 | def make_grader(expected):
150 |     boxed_i = [0]
151 |     boxed_err = [False]
152 |     expected_lines = expected.split("\n")
153 |     def checking_print(line):
154 |         if boxed_i[0] < len(expected_lines):
155 |             expected_line = expected_lines[boxed_i[0]]
156 |         else:
157 |             expected_line = "[END]"
158 |         if expected_line == line:
159 |             print(line)
160 |         else:
161 |             boxed_err[0] = True
162 |             print("\x1b[41m", end="")
163 |             print(line, end="")
164 |             print("\x1b[0m", end="")
165 |             print(" *** Expected: \x1b[42m" + expected_line + "\x1b[0m")
166 |         boxed_i[0] += 1
167 |         if boxed_i[0] == len(expected_lines):
168 |             print("Test failed" if boxed_err[0] else "Test succeeded")
169 |     return checking_print
170 | 


--------------------------------------------------------------------------------
/lab2/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2017 Deep RL Bootcamp Organizers.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/lab2/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | channels:
 3 |     - menpo
 4 |     - soumith
 5 | dependencies:
 6 |     - python==3.5.3
 7 |     - opencv3=3.1.0
 8 |     - numpy==1.13.1
 9 |     - scipy==0.19.1
10 |     - notebook
11 |     - pip:
12 |         - gym==0.9.2
13 |         - chainer==2.0.1
14 |         - ipdb==0.10.3
15 |         - tblib==1.3.2
16 |         - Pillow==4.2.1
17 |         - PyOpenGL==3.1.0
18 |         - cloudpickle==0.3.1
19 |         - click==6.7
20 |         - python-dateutil==2.6.1
21 |         - pyyaml==3.12
22 |         - easydict==1.7
23 |         - boto3==1.4.4
24 |         - mako==1.0.7
25 |         - redis==2.10.5
26 |         - Flask==0.12.2
27 |         - plotly==2.0.12
28 |         - tqdm==4.14.0
29 |         - cupy==1.0.1; 'linux' in sys_platform
30 |         - cached-property==1.3.0
31 |         - h5py==2.7.0
32 |         - matplotlib
33 | 


--------------------------------------------------------------------------------
/lab2/lab2.md:
--------------------------------------------------------------------------------
 1 | * Activate the conda environment by running
 2 | 	```
 3 |     source activate deeprlbootcamp
 4 |     ```
 5 | * Launch IPython Notebook from this directory; this should open up a browser window where you can click to open Lab2.
 6 | 	```
 7 |     jupyter notebook
 8 |     ```
 9 | * After opening a lab file, click “File - Trust Notebook”
10 | * If you have never used IPython Notebook before, skim this quick tutorial here: http://cs231n.github.io/ipython-tutorial/ 
11 | 


--------------------------------------------------------------------------------
/lab3/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2017 Deep RL Bootcamp Organizers.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/lab3/docker_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | viskit_port=$("$DIR/findport.py" 5000 1)
 4 | xhost=xhost
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | if [[ $(uname) == 'Darwin' ]]; then
12 |     # if xhost not defined, check 
13 |     if ! hash $xhost 2>/dev/null; then
14 |         xhost=/opt/X11/bin/xhost
15 |         if [ ! -f $xhost ]; then
16 |             echo "xhost not found!"
17 |             exit
18 |         fi
19 |     fi
20 |     ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}')
21 |     $xhost + $ip >/dev/null
22 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
23 |         -e DISPLAY=$ip:0 \
24 |         -v "$DIR":/root/code/bootcamp_pg \
25 |         -ti dementrock/deeprlbootcamp \
26 |           ${1-/bin/bash} "${@:2}"
27 |     $xhost - $ip >/dev/null
28 | elif [[ $(uname) == 'Linux' ]]; then
29 |     $xhost +local:root >/dev/null
30 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
31 |         -e DISPLAY=$DISPLAY \
32 |         -v /tmp/.X11-unix:/tmp/.X11-unix \
33 |         -v "$DIR":/root/code/bootcamp_pg \
34 |         -ti dementrock/deeprlbootcamp \
35 |           ${1-/bin/bash} "${@:2}"
36 |     $xhost -local:root >/dev/null
37 | else
38 |     echo "This script only supports macOS or Linux"
39 | fi
40 | 


--------------------------------------------------------------------------------
/lab3/docker_run_vnc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | vnc_port=$("$DIR/findport.sh" 3000 1)
 4 | viskit_port=$("$DIR/findport.sh" 5000 1)
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284"
12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
13 |     -v "$DIR":/root/code/bootcamp_pg \
14 |     -ti dementrock/deeprlbootcamp \
15 |       ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}"
16 | 


--------------------------------------------------------------------------------
/lab3/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | channels:
 3 |     - menpo
 4 |     - soumith
 5 | dependencies:
 6 |     - python==3.5.3
 7 |     - opencv3=3.1.0
 8 |     - numpy==1.13.1
 9 |     - scipy==0.19.1
10 |     - pip:
11 |         - gym==0.9.2
12 |         - chainer==2.0.1
13 |         - ipdb==0.10.3
14 |         - tblib==1.3.2
15 |         - atari_py==0.1.1
16 |         - Pillow==4.2.1
17 |         - PyOpenGL==3.1.0
18 |         - cloudpickle==0.3.1
19 |         - click==6.7
20 |         - python-dateutil==2.6.1
21 |         - pyyaml==3.12
22 |         - easydict==1.7
23 |         - boto3==1.4.4
24 |         - mako==1.0.7
25 |         - redis==2.10.5
26 |         - Flask==0.12.2
27 |         - plotly==2.0.12
28 |         - tqdm==4.14.0
29 |         - cupy==1.0.1; 'linux' in sys_platform
30 |         - cached-property==1.3.0
31 |         - h5py==2.7.0
32 | 


--------------------------------------------------------------------------------
/lab3/findport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Usage: findport.py 3000 100
 4 | #
 5 | 
 6 | """
 7 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 8 | 
 9 | Copyright 2017 Deep RL Bootcamp Organizers.
10 | 
11 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16 | 
17 | """
18 | 
19 | 
20 | from __future__ import print_function
21 | import socket
22 | from contextlib import closing
23 | import sys
24 | 
25 | if len(sys.argv) != 3:
26 |     print("Usage: {} <base_port> <increment>".format(sys.argv[0]))
27 |     sys.exit(1)
28 | 
29 | base = int(sys.argv[1])
30 | increment = int(sys.argv[2])
31 | 
32 | 
33 | def find_free_port():
34 |     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
35 |         for port in range(base, 65536, increment):
36 |             try:
37 |                 s.bind(('', port))
38 |                 return s.getsockname()[1]
39 |             except socket.error:
40 |                 continue
41 | 
42 | 
43 | print(find_free_port())
44 | 


--------------------------------------------------------------------------------
/lab3/findport.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Please run as root.
 4 | # Usage: bash findport.sh 3000 100
 5 | #
 6 | 
 7 | 
 8 | if [[ -z "$1" || -z "$2" ]]; then
 9 |   echo "Usage: $0 <base_port> <increment>"
10 |   exit 1
11 | fi
12 | 
13 | 
14 | BASE=$1
15 | INCREMENT=$2
16 | 
17 | port=$BASE
18 | isfree=$(netstat -aln | grep $port)
19 | 
20 | while [[ -n "$isfree" ]]; do
21 |   port=$[port+INCREMENT]
22 |   isfree=$(netstat -aln | grep $port)
23 | done
24 | 
25 | echo "$port"
26 | exit 0
27 | 


--------------------------------------------------------------------------------
/lab3/lab3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/lab3.pdf


--------------------------------------------------------------------------------
/lab3/launch_bg_screen_buffer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | killall() {
 4 | kill -INT "$xvfb_pid" 
 5 | kill -INT "$x11vnc_pid" 
 6 | exit
 7 | }
 8 | 
 9 | trap killall SIGINT
10 | trap killall SIGTERM
11 | trap killall SIGKILL
12 | 
13 | Xvfb :99 -screen 0 1024x768x24 -ac  +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$!
14 | 
15 | mkdir ~/.x11vnc
16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd
17 | 
18 | command="${1-/bin/bash} ${@:2}"
19 | 
20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!"
21 | 
22 | DISPLAY=:99 $command
23 | 
24 | killall
25 | 


--------------------------------------------------------------------------------
/lab3/scripts/setup_xquartz.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Check if XQuartz is installed
 3 | 
 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@"
 5 | 
 6 | app_dir=/Applications/Utilities/XQuartz.app
 7 | 
 8 | if [ -d $app_dir ]; then
 9 |     # Check installed version
10 |     app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString)
11 |     if [ $app_version == "2.7.11" ]; then
12 |         defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
13 |         defaults write org.macosforge.xquartz.X11 no_auth -bool false
14 |         defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
15 |         echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!"
16 |         exit
17 |     else
18 |         read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response
19 |         case "$response" in
20 |             [yY][eE][sS]|[yY]) 
21 |                 ;;
22 |             *)
23 |                 exit
24 |                 ;;
25 |         esac
26 |     fi
27 | fi
28 | 
29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg
30 | dmg_path=/tmp/xquartz.dmg
31 | echo "Downloading dmg from $url..."
32 | /usr/bin/curl -L -o $dmg_path $url
33 | echo "Mounting dmg file..."
34 | hdiutil mount $dmg_path
35 | echo "Installing..."
36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg  -target /
37 | 
38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false
40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
41 | 
42 | echo "Done! Make sure to log out and then log back in for the changes to take effect."
43 | 


--------------------------------------------------------------------------------
/lab3/scripts/test_environment_setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | def main():
18 |     import roboschool
19 |     import gym
20 |     import chainer
21 |     env = gym.make('CartPole-v0')
22 |     env.reset()
23 |     env.step(env.action_space.sample())
24 |     env = gym.make('RoboschoolHalfCheetah-v1')
25 |     env.reset()
26 |     env.step(env.action_space.sample())
27 |     print("Your environment has been successfully set up!")
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/lab3/simpledqn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/simpledqn/__init__.py


--------------------------------------------------------------------------------
/lab3/simpledqn/gridworld_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines
  5 | 
  6 | Copyright 2017 Deep RL Bootcamp Organizers.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  9 | 
 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 11 | 
 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 13 | 
 14 | """
 15 | 
 16 | 
 17 | import numpy as np
 18 | import sys
 19 | from six import StringIO, b
 20 | 
 21 | from gym import utils
 22 | from gym.envs.toy_text import discrete
 23 | from gym.envs.registration import register
 24 | 
 25 | LEFT = 0
 26 | DOWN = 1
 27 | RIGHT = 2
 28 | UP = 3
 29 | 
 30 | MAPS = {
 31 |     "4x4": [
 32 |         "SFFF",
 33 |         "FFFH",
 34 |         "FFFF",
 35 |         "HFFG"
 36 |     ],
 37 |     "8x8": [
 38 |         "SFFFFFFF",
 39 |         "FFFFFFFF",
 40 |         "FFFHFFFF",
 41 |         "FFFFFHFF",
 42 |         "FFFHFFFF",
 43 |         "FHHFFFHF",
 44 |         "FHFFHFHF",
 45 |         "FFFHFFFG"
 46 |     ],
 47 |     "9x9": [
 48 |         "HFFFFFFFH",
 49 |         "FFFFFFFFF",
 50 |         "FFFFFFFFF",
 51 |         "FFFFFFFFF",
 52 |         "FFFFSFFFF",
 53 |         "FFFFFFFFF",
 54 |         "FFFFFFFFF",
 55 |         "FFFFFFFFF",
 56 |         "HFFFFFFFH"
 57 |     ]
 58 | }
 59 | 
 60 | 
 61 | def to_one_hot(x, len):
 62 |     one_hot = np.zeros(len)
 63 |     one_hot[x] = 1
 64 |     return one_hot
 65 | 
 66 | 
 67 | class GridWorld(discrete.DiscreteEnv):
 68 |     """
 69 |     Winter is here. You and your friends were tossing around a frisbee at the park
 70 |     when you made a wild throw that left the frisbee out in the middle of the lake.
 71 |     The water is mostly frozen, but there are a few holes where the ice has melted.
 72 |     If you step into one of those holes, you'll fall into the freezing water.
 73 |     At this time, there's an international frisbee shortage, so it's absolutely imperative that
 74 |     you navigate across the lake and retrieve the disc.
 75 |     However, the ice is slippery, so you won't always move in the direction you intend.
 76 |     The surface is described using a grid like the following
 77 | 
 78 |         SFFF
 79 |         FHFH
 80 |         FFFH
 81 |         HFFG
 82 | 
 83 |     S : starting point, safe
 84 |     F : frozen surface, safe
 85 |     H : hole, fall to your doom
 86 |     G : goal, where the frisbee is located
 87 | 
 88 |     The episode ends when you reach the goal or fall in a hole.
 89 |     You receive a reward of 1 if you reach the goal, and zero otherwise.
 90 | 
 91 |     """
 92 | 
 93 |     metadata = {'render.modes': ['human', 'ansi']}
 94 | 
 95 |     def __init__(self, desc=None, map_name="4x4", is_slippery=False):
 96 |         if desc is None and map_name is None:
 97 |             raise ValueError('Must provide either desc or map_name')
 98 |         elif desc is None:
 99 |             desc = MAPS[map_name]
100 |         self.desc = desc = np.asarray(desc, dtype='c')
101 |         self.nrow, self.ncol = nrow, ncol = desc.shape
102 | 
103 |         nA = 4
104 |         nS = nrow * ncol
105 | 
106 |         isd = np.array(desc == b'S').astype('float64').ravel()
107 |         isd /= isd.sum()
108 | 
109 |         P = {s: {a: [] for a in range(nA)} for s in range(nS)}
110 | 
111 |         def to_s(row, col):
112 |             return row * ncol + col
113 | 
114 |         def inc(row, col, a):
115 |             if a == 0:  # left
116 |                 col = max(col - 1, 0)
117 |             elif a == 1:  # down
118 |                 row = min(row + 1, nrow - 1)
119 |             elif a == 2:  # right
120 |                 col = min(col + 1, ncol - 1)
121 |             elif a == 3:  # up
122 |                 row = max(row - 1, 0)
123 |             return (row, col)
124 | 
125 |         for row in range(nrow):
126 |             for col in range(ncol):
127 |                 s = to_s(row, col)
128 |                 for a in range(4):
129 |                     li = P[s][a]
130 |                     letter = desc[row, col]
131 |                     if letter in b'GH':
132 |                         li.append((1.0, s, 0, True))
133 |                     else:
134 |                         if is_slippery:
135 |                             for b in [(a - 1) % 4, a, (a + 1) % 4]:
136 |                                 newrow, newcol = inc(row, col, b)
137 |                                 newstate = to_s(newrow, newcol)
138 |                                 newletter = desc[newrow, newcol]
139 |                                 done = bytes(newletter) in b'GH'
140 |                                 if newletter == b'G':
141 |                                     rew = 1.0
142 |                                 elif newletter == b'H':
143 |                                     rew = .0
144 |                                 else:
145 |                                     rew = 0.
146 |                                 # rew = float(newletter == b'G')
147 |                                 li.append((1.0 / 3.0, newstate, rew, done))
148 |                         else:
149 |                             newrow, newcol = inc(row, col, a)
150 |                             newstate = to_s(newrow, newcol)
151 |                             newletter = desc[newrow, newcol]
152 |                             done = bytes(newletter) in b'GH'
153 |                             # rew = float(newletter == b'G')
154 |                             if newletter == b'G':
155 |                                 rew = 1.0
156 |                             elif newletter == b'H':
157 |                                 rew = 0.
158 |                             else:
159 |                                 rew = 0.
160 |                             li.append((1.0, newstate, rew, done))
161 | 
162 |         super(GridWorld, self).__init__(nS, nA, P, isd)
163 | 
164 |     def _reset(self):
165 |         s = super(GridWorld, self)._reset()
166 |         return to_one_hot(s, self.nS)
167 | 
168 |     def _step(self, a):
169 |         s, r, d, p = super(GridWorld, self)._step(a)
170 |         return to_one_hot(s, self.nS), r, d, p
171 | 
172 |     def print_obs(self, obs):
173 |         import copy
174 |         map = copy.deepcopy(self.desc).astype(str)
175 |         _obs = int(np.where(obs == 1)[0][0])
176 |         map[_obs // 9, _obs % 9] = 'X'
177 |         for row in map:
178 |             print(row)
179 | 
180 |     def _render(self, mode='human', close=False):
181 |         if close:
182 |             return
183 |         outfile = StringIO() if mode == 'ansi' else sys.stdout
184 | 
185 |         row, col = self.s // self.ncol, self.s % self.ncol
186 |         desc = self.desc.tolist()
187 |         desc = [[c.decode('utf-8') for c in line] for line in desc]
188 |         desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
189 |         if self.lastaction is not None:
190 |             outfile.write("  ({})\n".format(
191 |                 ["Left", "Down", "Right", "Up"][self.lastaction]))
192 |         else:
193 |             outfile.write("\n")
194 |         outfile.write("\n".join(''.join(line) for line in desc) + "\n")
195 | 
196 |         if mode != 'human':
197 |             return outfile
198 | 
199 | 
200 | register(
201 |     'GridWorld-v0',
202 |     entry_point='simpledqn.gridworld_env:GridWorld',
203 |     timestep_limit=40,
204 | )
205 | 


--------------------------------------------------------------------------------
/lab3/simpledqn/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 3 | 
 4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | import numpy as np
18 | import random
19 | import pickle
20 | 
21 | 
22 | class ReplayBuffer(object):
23 |     def __init__(self, max_size):
24 |         """Simple replay buffer for storing sampled DQN (s, a, s', r) transitions as tuples.
25 | 
26 |         :param size: Maximum size of the replay buffer.
27 |         """
28 |         self._buffer = []
29 |         self._max_size = max_size
30 |         self._idx = 0
31 | 
32 |     def __len__(self):
33 |         return len(self._buffer)
34 | 
35 |     def add(self, obs_t, act, rew, obs_tp1, done):
36 |         """
37 |         Add a new sample to the replay buffer.
38 |         :param obs_t: observation at time t
39 |         :param act:  action
40 |         :param rew: reward
41 |         :param obs_tp1: observation at time t+1
42 |         :param done: termination signal (whether episode has finished or not)
43 |         """
44 |         data = (obs_t, act, rew, obs_tp1, done)
45 |         if self._idx >= len(self._buffer):
46 |             self._buffer.append(data)
47 |         else:
48 |             self._buffer[self._idx] = data
49 |         self._idx = (self._idx + 1) % self._max_size
50 | 
51 |     def _encode_sample(self, idxes):
52 |         obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
53 |         for i in idxes:
54 |             data = self._buffer[i]
55 |             obs_t, action, reward, obs_tp1, done = data
56 |             obses_t.append(np.array(obs_t, copy=False))
57 |             actions.append(np.array(action, copy=False))
58 |             rewards.append(reward)
59 |             obses_tp1.append(np.array(obs_tp1, copy=False))
60 |             dones.append(done)
61 |         return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones)
62 | 
63 |     def sample(self, batch_size):
64 |         """Sample a batch of transition tuples.
65 | 
66 |         :param batch_size: Number of sampled transition tuples.
67 |         :return: Tuple of transitions.
68 |         """
69 |         idxes = [random.randint(0, len(self._buffer) - 1)
70 |                  for _ in range(batch_size)]
71 |         return self._encode_sample(idxes)
72 | 
73 |     def dump(self, file_path=None):
74 |         """Dump the replay buffer into a file.
75 |         """
76 |         file = open(file_path, 'wb')
77 |         pickle.dump(self._buffer, file, -1)
78 |         file.close()
79 | 
80 |     def load(self, file_path=None):
81 |         """Load the replay buffer from a file
82 |         """
83 |         file = open(file_path, 'rb')
84 |         self._buffer = pickle.load(file)
85 |         file.close()
86 | 


--------------------------------------------------------------------------------
/lab3/simpledqn/replay_buffer_warm_start.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/simpledqn/replay_buffer_warm_start.pkl


--------------------------------------------------------------------------------
/lab3/simpledqn/simple_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines
  5 | 
  6 | Copyright 2017 Deep RL Bootcamp Organizers.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  9 | 
 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 11 | 
 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 13 | 
 14 | """
 15 | 
 16 | 
 17 | import numpy as np
 18 | import scipy.special
 19 | import chainer
 20 | 
 21 | 
 22 | # Compute gradient approximately using finite difference
 23 | def numerical_grad(f, x, eps=1e-8):
 24 |     grad = np.zeros_like(x)
 25 |     for i in range(len(x)):
 26 |         xplus = np.array(x)
 27 |         xplus[i] += eps
 28 |         fplus = f(xplus)
 29 |         xminus = np.array(x)
 30 |         xminus[i] -= eps
 31 |         fminus = f(xminus)
 32 |         grad[i] = (fplus - fminus) / (2 * eps)
 33 |     return grad
 34 | 
 35 | 
 36 | def gradient_check(f, g, x):
 37 |     # Test the implementation of g(x) = df/dx
 38 |     # Perform numerical differentiation and test it
 39 |     g_num = numerical_grad(f, x)
 40 |     g_test = g(x)
 41 |     try:
 42 |         np.testing.assert_allclose(g_num, g_test, rtol=1e-5)
 43 |         print("Gradient check passed!")
 44 |     except AssertionError as e:
 45 |         print(e)
 46 |         print("Warning: Gradient check didn't pass!")
 47 | 
 48 | 
 49 | def log_softmax(logits):
 50 |     return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True)
 51 | 
 52 | 
 53 | def softmax(logits):
 54 |     x = logits
 55 |     x = x - np.max(x, axis=-1, keepdims=True)
 56 |     x = np.exp(x)
 57 |     return x / np.sum(x, axis=-1, keepdims=True)
 58 | 
 59 | 
 60 | def weighted_sample(logits, rng=np.random):
 61 |     weights = softmax(logits)
 62 |     return min(
 63 |         int(np.sum(rng.uniform() > np.cumsum(weights))),
 64 |         len(weights) - 1
 65 |     )
 66 | 
 67 | 
 68 | def include_bias(x):
 69 |     # Add a constant term (1.0) to each entry in x
 70 |     return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1)
 71 | 
 72 | 
 73 | _tested = set()
 74 | 
 75 | nprs = np.random.RandomState
 76 | 
 77 | 
 78 | def assert_allclose(a, b):
 79 |     if isinstance(a, (np.ndarray, float, int)):
 80 |         np.testing.assert_allclose(a, b)
 81 |     elif isinstance(a, (tuple, list)):
 82 |         assert isinstance(b, (tuple, list))
 83 |         assert len(a) == len(b)
 84 |         for a_i, b_i in zip(a, b):
 85 |             assert_allclose(a_i, b_i)
 86 |     elif isinstance(a, chainer.Variable):
 87 |         assert isinstance(b, chainer.Variable)
 88 |         assert_allclose(a.data, b.data)
 89 |     else:
 90 |         raise NotImplementedError
 91 | 
 92 | 
 93 | def test_once(fn, kwargs, desired_output=None):
 94 |     if fn.__name__ in _tested:
 95 |         return
 96 |     _tested.add(fn.__name__)
 97 | 
 98 |     if callable(kwargs):
 99 |         kwargs = kwargs()
100 | 
101 |     if callable(desired_output):
102 |         desired_output = desired_output()
103 | 
104 |     if desired_output is None:
105 |         print("Desired output for %s:" % (fn.__name__), repr(fn(**kwargs)))
106 |         exit()
107 |     else:
108 |         try:
109 |             output = fn(**kwargs)
110 |             assert_allclose(desired_output, output)
111 |             print("Test for %s passed!" % (fn.__name__))
112 |         except AssertionError as e:
113 |             print(e)
114 |             print("Warning: test for %s didn't pass!" % (fn.__name__))
115 | 


--------------------------------------------------------------------------------
/lab3/simpledqn/weights_warm_start.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/simpledqn/weights_warm_start.pkl


--------------------------------------------------------------------------------
/lab3/simpledqn/wrappers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 3 | 
 4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | import gym
18 | import numpy as np
19 | 
20 | 
21 | class NoopResetEnv(gym.Wrapper):
22 |     def __init__(self, env=None, noop_max=30):
23 |         """Sample initial states by taking random number of no-ops on reset.
24 |         No-op is assumed to be action 0.
25 |         """
26 |         super(NoopResetEnv, self).__init__(env)
27 |         self.noop_max = noop_max
28 |         self.override_num_noops = None
29 |         assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
30 | 
31 |     def _reset(self):
32 |         """ Do no-op action for a number of steps in [1, noop_max]."""
33 |         self.env.reset()
34 |         if self.override_num_noops is not None:
35 |             noops = self.override_num_noops
36 |         else:
37 |             noops = np.random.randint(1, self.noop_max + 1)
38 |         assert noops > 0
39 |         obs = None
40 |         for _ in range(noops):
41 |             obs, _, done, _ = self.env.step(0)
42 |             if done:
43 |                 obs = self.env.reset()
44 |         return obs
45 | 
46 | 
47 | class EpisodicLifeEnv(gym.Wrapper):
48 |     def __init__(self, env=None):
49 |         """Make end-of-life == end-of-episode, but only reset on true game over.
50 |         Done by DeepMind for the DQN and co. since it helps value estimation.
51 |         """
52 |         super(EpisodicLifeEnv, self).__init__(env)
53 |         self.lives = 0
54 |         self.was_real_done = True
55 |         self.was_real_reset = False
56 | 
57 |     def _step(self, action):
58 |         obs, reward, done, info = self.env.step(action)
59 |         self.was_real_done = done
60 |         # check current lives, make loss of life terminal,
61 |         # then update lives to handle bonus lives
62 |         lives = self.env.unwrapped.ale.lives()
63 |         if lives < self.lives and lives > 0:
64 |             # for Qbert somtimes we stay in lives == 0 condtion for a few frames
65 |             # so its important to keep lives > 0, so that we only reset once
66 |             # the environment advertises done.
67 |             done = True
68 |         self.lives = lives
69 |         return obs, reward, done, info
70 | 
71 |     def _reset(self):
72 |         """Reset only when lives are exhausted.
73 |         This way all states are still reachable even though lives are episodic,
74 |         and the learner need not know about any of this behind-the-scenes.
75 |         """
76 |         if self.was_real_done:
77 |             obs = self.env.reset()
78 |             self.was_real_reset = True
79 |         else:
80 |             # no-op step to advance from terminal/lost life state
81 |             obs, _, _, _ = self.env.step(0)
82 |             self.was_real_reset = False
83 |         self.lives = self.env.unwrapped.ale.lives()
84 |         return obs
85 | 


--------------------------------------------------------------------------------
/lab3/viskit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/viskit/__init__.py


--------------------------------------------------------------------------------
/lab3/viskit/core.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Copyright 2017 Deep RL Bootcamp Organizers.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | 
  8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9 | 
 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 11 | 
 12 | """
 13 | 
 14 | 
 15 | import csv
 16 | import itertools
 17 | import json
 18 | import os
 19 | 
 20 | import numpy as np
 21 | 
 22 | 
 23 | # from sandbox.rocky.utils.py_utils import AttrDict
 24 | 
 25 | class AttrDict(dict):
 26 |     def __init__(self, *args, **kwargs):
 27 |         super(AttrDict, self).__init__(*args, **kwargs)
 28 |         self.__dict__ = self
 29 | 
 30 | 
 31 | def unique(l):
 32 |     return list(set(l))
 33 | 
 34 | 
 35 | def flatten(l):
 36 |     return [item for sublist in l for item in sublist]
 37 | 
 38 | 
 39 | def load_progress(progress_json_path, verbose=True):
 40 |     if verbose:
 41 |         print("Reading %s" % progress_json_path)
 42 |     entries = dict()
 43 |     rows = []
 44 |     with open(progress_json_path, 'r') as f:
 45 |         lines = f.read().split('\n')
 46 |         for line in lines:
 47 |             if len(line) > 0:
 48 |                 row = json.loads(line)
 49 |                 rows.append(row)
 50 |     all_keys = set(k for row in rows for k in row.keys())
 51 |     for k in all_keys:
 52 |         if k not in entries:
 53 |             entries[k] = []
 54 |         for row in rows:
 55 |             if k in row:
 56 |                 v = row[k]
 57 |                 try:
 58 |                     entries[k].append(float(v))
 59 |                 except:
 60 |                     entries[k].append(np.nan)
 61 |             else:
 62 |                 entries[k].append(np.nan)
 63 | 
 64 |         # entries[key] = [row.get(key, np.nan) for row in rows]
 65 |         #         added_keys = set()
 66 |         #         for k, v in row.items():
 67 |         #             if k not in entries:
 68 |         #                 entries[k] = []
 69 |         #             try:
 70 |         #                 entries[k].append(float(v))
 71 |         #             except:
 72 |         #                 entries[k].append(0.)
 73 |         #             added_keys.add(k)
 74 |         #         for k in entries.keys():
 75 |         #             if k not in added_keys:
 76 |         #                 entries[k].append(np.nan)
 77 |     entries = dict([(k, np.array(v)) for k, v in entries.items()])
 78 |     return entries
 79 | 
 80 | 
 81 | def flatten_dict(d):
 82 |     flat_params = dict()
 83 |     for k, v in d.items():
 84 |         if isinstance(v, dict):
 85 |             v = flatten_dict(v)
 86 |             for subk, subv in flatten_dict(v).items():
 87 |                 flat_params[k + "." + subk] = subv
 88 |         else:
 89 |             flat_params[k] = v
 90 |     return flat_params
 91 | 
 92 | 
 93 | def load_params(params_json_path):
 94 |     with open(params_json_path, 'r') as f:
 95 |         data = json.loads(f.read())
 96 |         if "args_data" in data:
 97 |             del data["args_data"]
 98 |         if "exp_name" not in data:
 99 |             data["exp_name"] = params_json_path.split("/")[-2]
100 |     return data
101 | 
102 | 
103 | def lookup(d, keys):
104 |     if not isinstance(keys, list):
105 |         keys = keys.split(".")
106 |     for k in keys:
107 |         if hasattr(d, "__getitem__"):
108 |             if k in d:
109 |                 d = d[k]
110 |             else:
111 |                 return None
112 |         else:
113 |             return None
114 |     return d
115 | 
116 | 
117 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True):
118 |     if isinstance(exp_folder_paths, str):
119 |         exp_folder_paths = [exp_folder_paths]
120 |     exps = []
121 |     for exp_folder_path in exp_folder_paths:
122 |         exps += [x[0] for x in os.walk(exp_folder_path)]
123 |     if verbose:
124 |         print("finished walking exp folders")
125 |     exps_data = []
126 |     for exp in exps:
127 |         try:
128 |             exp_path = exp
129 |             variant_json_path = os.path.join(exp_path, "variant.json")
130 |             progress_json_path = os.path.join(exp_path, "progress.json")
131 |             progress = load_progress(progress_json_path, verbose=verbose)
132 |             try:
133 |                 params = load_params(variant_json_path)
134 |             except IOError:
135 |                 params = dict(exp_name="experiment")
136 |             exps_data.append(AttrDict(
137 |                 progress=progress, params=params, flat_params=flatten_dict(params)))
138 |         except IOError as e:
139 |             if verbose:
140 |                 print(e)
141 | 
142 |     # a dictionary of all keys and types of values
143 |     all_keys = dict()
144 |     for data in exps_data:
145 |         for key in data.flat_params.keys():
146 |             if key not in all_keys:
147 |                 all_keys[key] = type(data.flat_params[key])
148 | 
149 |     # if any data does not have some key, specify the value of it
150 |     if not ignore_missing_keys:
151 |         default_values = dict()
152 |         for data in exps_data:
153 |             for key in sorted(all_keys.keys()):
154 |                 if key not in data.flat_params:
155 |                     if key not in default_values:
156 |                         default = None
157 |                         default_values[key] = default
158 |                     data.flat_params[key] = default_values[key]
159 | 
160 |     return exps_data
161 | 
162 | 
163 | def smart_repr(x):
164 |     if isinstance(x, tuple):
165 |         if len(x) == 0:
166 |             return "tuple()"
167 |         elif len(x) == 1:
168 |             return "(%s,)" % smart_repr(x[0])
169 |         else:
170 |             return "(" + ",".join(map(smart_repr, x)) + ")"
171 |     else:
172 |         if hasattr(x, "__call__"):
173 |             return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__)
174 |         else:
175 |             return repr(x)
176 | 
177 | 
178 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1):
179 |     try:
180 |         stringified_pairs = sorted(
181 |             map(
182 |                 eval,
183 |                 unique(
184 |                     flatten(
185 |                         [
186 |                             list(
187 |                                 map(
188 |                                     smart_repr,
189 |                                     list(d.flat_params.items())
190 |                                 )
191 |                             )
192 |                             for d in exps_data
193 |                         ]
194 |                     )
195 |                 )
196 |             ),
197 |             key=lambda x: (
198 |                 tuple("" if it is None else str(it) for it in x),
199 |             )
200 |         )
201 |     except Exception as e:
202 |         print(e)
203 |         import ipdb
204 |         ipdb.set_trace()
205 |     proposals = [(k, [x[1] for x in v])
206 |                  for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])]
207 |     filtered = [(k, v) for (k, v) in proposals if len(v) > l and all(
208 |         [k.find(excluded_param) != 0 for excluded_param in excluded_params])]
209 |     return filtered
210 | 
211 | 
212 | class Selector(object):
213 |     def __init__(self, exps_data, filters=None, custom_filters=None):
214 |         self._exps_data = exps_data
215 |         if filters is None:
216 |             self._filters = tuple()
217 |         else:
218 |             self._filters = tuple(filters)
219 |         if custom_filters is None:
220 |             self._custom_filters = []
221 |         else:
222 |             self._custom_filters = custom_filters
223 | 
224 |     def where(self, k, v):
225 |         return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters)
226 | 
227 |     def custom_filter(self, filter):
228 |         return Selector(self._exps_data, self._filters, self._custom_filters + [filter])
229 | 
230 |     def _check_exp(self, exp):
231 |         # or exp.flat_params.get(k, None) is None
232 |         return all(
233 |             ((str(exp.flat_params.get(k, None)) == str(v) or (
234 |                 k not in exp.flat_params)) for k, v in self._filters)
235 |         ) and all(custom_filter(exp) for custom_filter in self._custom_filters)
236 | 
237 |     def extract(self):
238 |         return list(filter(self._check_exp, self._exps_data))
239 | 
240 |     def iextract(self):
241 |         return filter(self._check_exp, self._exps_data)
242 | 
243 | 
244 | # Taken from plot.ly
245 | color_defaults = [
246 |     '#1f77b4',  # muted blue
247 |     '#ff7f0e',  # safety orange
248 |     '#2ca02c',  # cooked asparagus green
249 |     '#d62728',  # brick red
250 |     '#9467bd',  # muted purple
251 |     '#8c564b',  # chestnut brown
252 |     '#e377c2',  # raspberry yogurt pink
253 |     '#7f7f7f',  # middle gray
254 |     '#bcbd22',  # curry yellow-green
255 |     '#17becf'  # blue-teal
256 | ]
257 | 
258 | 
259 | def hex_to_rgb(hex, opacity=1.0):
260 |     if hex[0] == '#':
261 |         hex = hex[1:]
262 |     assert (len(hex) == 6)
263 |     return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity)
264 | 


--------------------------------------------------------------------------------
/lab3/viskit/static/css/dropdowns-enhancement.css:
--------------------------------------------------------------------------------
  1 | .dropdown-menu > li > label {
  2 |   display: block;
  3 |   padding: 3px 20px;
  4 |   clear: both;
  5 |   font-weight: normal;
  6 |   line-height: 1.42857143;
  7 |   color: #333333;
  8 |   white-space: nowrap;
  9 | }
 10 | .dropdown-menu > li > label:hover,
 11 | .dropdown-menu > li > label:focus {
 12 |   text-decoration: none;
 13 |   color: #262626;
 14 |   background-color: #f5f5f5;
 15 | }
 16 | .dropdown-menu > li > input:checked ~ label,
 17 | .dropdown-menu > li > input:checked ~ label:hover,
 18 | .dropdown-menu > li > input:checked ~ label:focus,
 19 | .dropdown-menu > .active > label,
 20 | .dropdown-menu > .active > label:hover,
 21 | .dropdown-menu > .active > label:focus {
 22 |   color: #ffffff;
 23 |   text-decoration: none;
 24 |   outline: 0;
 25 |   background-color: #428bca;
 26 | }
 27 | .dropdown-menu > li > input[disabled] ~ label,
 28 | .dropdown-menu > li > input[disabled] ~ label:hover,
 29 | .dropdown-menu > li > input[disabled] ~ label:focus,
 30 | .dropdown-menu > .disabled > label,
 31 | .dropdown-menu > .disabled > label:hover,
 32 | .dropdown-menu > .disabled > label:focus {
 33 |   color: #999999;
 34 | }
 35 | .dropdown-menu > li > input[disabled] ~ label:hover,
 36 | .dropdown-menu > li > input[disabled] ~ label:focus,
 37 | .dropdown-menu > .disabled > label:hover,
 38 | .dropdown-menu > .disabled > label:focus {
 39 |   text-decoration: none;
 40 |   background-color: transparent;
 41 |   background-image: none;
 42 |   filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
 43 |   cursor: not-allowed;
 44 | }
 45 | .dropdown-menu > li > label {
 46 |   margin-bottom: 0;
 47 |   cursor: pointer;
 48 | }
 49 | .dropdown-menu > li > input[type="radio"],
 50 | .dropdown-menu > li > input[type="checkbox"] {
 51 |   display: none;
 52 |   position: absolute;
 53 |   top: -9999em;
 54 |   left: -9999em;
 55 | }
 56 | .dropdown-menu > li > label:focus,
 57 | .dropdown-menu > li > input:focus ~ label {
 58 |   outline: thin dotted;
 59 |   outline: 5px auto -webkit-focus-ring-color;
 60 |   outline-offset: -2px;
 61 | }
 62 | .dropdown-menu.pull-right {
 63 |   right: 0;
 64 |   left: auto;
 65 | }
 66 | .dropdown-menu.pull-top {
 67 |   bottom: 100%;
 68 |   top: auto;
 69 |   margin: 0 0 2px;
 70 |   -webkit-box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175);
 71 |   box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175);
 72 | }
 73 | .dropdown-menu.pull-center {
 74 |   right: 50%;
 75 |   left: auto;
 76 | }
 77 | .dropdown-menu.pull-middle {
 78 |   right: 100%;
 79 |   margin: 0 2px 0 0;
 80 |   box-shadow: -5px 0 10px rgba(0, 0, 0, 0.2);
 81 |   left: auto;
 82 | }
 83 | .dropdown-menu.pull-middle.pull-right {
 84 |   right: auto;
 85 |   left: 100%;
 86 |   margin: 0 0 0 2px;
 87 |   box-shadow: 5px 0 10px rgba(0, 0, 0, 0.2);
 88 | }
 89 | .dropdown-menu.pull-middle.pull-center {
 90 |   right: 50%;
 91 |   margin: 0;
 92 |   box-shadow: 0 0 10px rgba(0, 0, 0, 0.2);
 93 | }
 94 | .dropdown-menu.bullet {
 95 |   margin-top: 8px;
 96 | }
 97 | .dropdown-menu.bullet:before {
 98 |   width: 0;
 99 |   height: 0;
100 |   content: '';
101 |   display: inline-block;
102 |   position: absolute;
103 |   border-color: transparent;
104 |   border-style: solid;
105 |   -webkit-transform: rotate(360deg);
106 |   border-width: 0 7px 7px;
107 |   border-bottom-color: #cccccc;
108 |   border-bottom-color: rgba(0, 0, 0, 0.15);
109 |   top: -7px;
110 |   left: 9px;
111 | }
112 | .dropdown-menu.bullet:after {
113 |   width: 0;
114 |   height: 0;
115 |   content: '';
116 |   display: inline-block;
117 |   position: absolute;
118 |   border-color: transparent;
119 |   border-style: solid;
120 |   -webkit-transform: rotate(360deg);
121 |   border-width: 0 6px 6px;
122 |   border-bottom-color: #ffffff;
123 |   top: -6px;
124 |   left: 10px;
125 | }
126 | .dropdown-menu.bullet.pull-right:before {
127 |   left: auto;
128 |   right: 9px;
129 | }
130 | .dropdown-menu.bullet.pull-right:after {
131 |   left: auto;
132 |   right: 10px;
133 | }
134 | .dropdown-menu.bullet.pull-top {
135 |   margin-top: 0;
136 |   margin-bottom: 8px;
137 | }
138 | .dropdown-menu.bullet.pull-top:before {
139 |   top: auto;
140 |   bottom: -7px;
141 |   border-bottom-width: 0;
142 |   border-top-width: 7px;
143 |   border-top-color: #cccccc;
144 |   border-top-color: rgba(0, 0, 0, 0.15);
145 | }
146 | .dropdown-menu.bullet.pull-top:after {
147 |   top: auto;
148 |   bottom: -6px;
149 |   border-bottom: none;
150 |   border-top-width: 6px;
151 |   border-top-color: #ffffff;
152 | }
153 | .dropdown-menu.bullet.pull-center:before {
154 |   left: auto;
155 |   right: 50%;
156 |   margin-right: -7px;
157 | }
158 | .dropdown-menu.bullet.pull-center:after {
159 |   left: auto;
160 |   right: 50%;
161 |   margin-right: -6px;
162 | }
163 | .dropdown-menu.bullet.pull-middle {
164 |   margin-right: 8px;
165 | }
166 | .dropdown-menu.bullet.pull-middle:before {
167 |   top: 50%;
168 |   left: 100%;
169 |   right: auto;
170 |   margin-top: -7px;
171 |   border-right-width: 0;
172 |   border-bottom-color: transparent;
173 |   border-top-width: 7px;
174 |   border-left-color: #cccccc;
175 |   border-left-color: rgba(0, 0, 0, 0.15);
176 | }
177 | .dropdown-menu.bullet.pull-middle:after {
178 |   top: 50%;
179 |   left: 100%;
180 |   right: auto;
181 |   margin-top: -6px;
182 |   border-right-width: 0;
183 |   border-bottom-color: transparent;
184 |   border-top-width: 6px;
185 |   border-left-color: #ffffff;
186 | }
187 | .dropdown-menu.bullet.pull-middle.pull-right {
188 |   margin-right: 0;
189 |   margin-left: 8px;
190 | }
191 | .dropdown-menu.bullet.pull-middle.pull-right:before {
192 |   left: -7px;
193 |   border-left-width: 0;
194 |   border-right-width: 7px;
195 |   border-right-color: #cccccc;
196 |   border-right-color: rgba(0, 0, 0, 0.15);
197 | }
198 | .dropdown-menu.bullet.pull-middle.pull-right:after {
199 |   left: -6px;
200 |   border-left-width: 0;
201 |   border-right-width: 6px;
202 |   border-right-color: #ffffff;
203 | }
204 | .dropdown-menu.bullet.pull-middle.pull-center {
205 |   margin-left: 0;
206 |   margin-right: 0;
207 | }
208 | .dropdown-menu.bullet.pull-middle.pull-center:before {
209 |   border: none;
210 |   display: none;
211 | }
212 | .dropdown-menu.bullet.pull-middle.pull-center:after {
213 |   border: none;
214 |   display: none;
215 | }
216 | .dropdown-submenu {
217 |   position: relative;
218 | }
219 | .dropdown-submenu > .dropdown-menu {
220 |   top: 0;
221 |   left: 100%;
222 |   margin-top: -6px;
223 |   margin-left: -1px;
224 |   border-top-left-radius: 0;
225 | }
226 | .dropdown-submenu > a:before {
227 |   display: block;
228 |   float: right;
229 |   width: 0;
230 |   height: 0;
231 |   content: "";
232 |   margin-top: 6px;
233 |   margin-right: -8px;
234 |   border-width: 4px 0 4px 4px;
235 |   border-style: solid;
236 |   border-left-style: dashed;
237 |   border-top-color: transparent;
238 |   border-bottom-color: transparent;
239 | }
240 | @media (max-width: 767px) {
241 |   .navbar-nav .dropdown-submenu > a:before {
242 |     margin-top: 8px;
243 |     border-color: inherit;
244 |     border-style: solid;
245 |     border-width: 4px 4px 0;
246 |     border-left-color: transparent;
247 |     border-right-color: transparent;
248 |   }
249 |   .navbar-nav .dropdown-submenu > a {
250 |     padding-left: 40px;
251 |   }
252 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > a,
253 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > label {
254 |     padding-left: 35px;
255 |   }
256 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > a,
257 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > label {
258 |     padding-left: 45px;
259 |   }
260 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a,
261 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label {
262 |     padding-left: 55px;
263 |   }
264 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a,
265 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label {
266 |     padding-left: 65px;
267 |   }
268 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a,
269 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label {
270 |     padding-left: 75px;
271 |   }
272 | }
273 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a,
274 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover,
275 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus {
276 |   background-color: #e7e7e7;
277 |   color: #555555;
278 | }
279 | @media (max-width: 767px) {
280 |   .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before {
281 |     border-top-color: #555555;
282 |   }
283 | }
284 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a,
285 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover,
286 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus {
287 |   background-color: #080808;
288 |   color: #ffffff;
289 | }
290 | @media (max-width: 767px) {
291 |   .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before {
292 |     border-top-color: #ffffff;
293 |   }
294 | }
295 | 


--------------------------------------------------------------------------------
/lab3/viskit/static/js/dropdowns-enhancement.js:
--------------------------------------------------------------------------------
  1 | /* ========================================================================
  2 |  * Bootstrap Dropdowns Enhancement: dropdowns-enhancement.js v3.1.1 (Beta 1)
  3 |  * http://behigh.github.io/bootstrap_dropdowns_enhancement/
  4 |  * ========================================================================
  5 |  * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
  6 |  * ======================================================================== */
  7 | 
  8 | (function($) {
  9 |     "use strict";
 10 | 
 11 |     var toggle   = '[data-toggle="dropdown"]',
 12 |         disabled = '.disabled, :disabled',
 13 |         backdrop = '.dropdown-backdrop',
 14 |         menuClass = 'dropdown-menu',
 15 |         subMenuClass = 'dropdown-submenu',
 16 |         namespace = '.bs.dropdown.data-api',
 17 |         eventNamespace = '.bs.dropdown',
 18 |         openClass = 'open',
 19 |         touchSupport = 'ontouchstart' in document.documentElement,
 20 |         opened;
 21 | 
 22 | 
 23 |     function Dropdown(element) {
 24 |         $(element).on('click' + eventNamespace, this.toggle)
 25 |     }
 26 | 
 27 |     var proto = Dropdown.prototype;
 28 | 
 29 |     proto.toggle = function(event) {
 30 |         var $element = $(this);
 31 | 
 32 |         if ($element.is(disabled)) return;
 33 | 
 34 |         var $parent = getParent($element);
 35 |         var isActive = $parent.hasClass(openClass);
 36 |         var isSubMenu = $parent.hasClass(subMenuClass);
 37 |         var menuTree = isSubMenu ? getSubMenuParents($parent) : null;
 38 | 
 39 |         closeOpened(event, menuTree);
 40 | 
 41 |         if (!isActive) {
 42 |             if (!menuTree)
 43 |                 menuTree = [$parent];
 44 | 
 45 |             if (touchSupport && !$parent.closest('.navbar-nav').length && !menuTree[0].find(backdrop).length) {
 46 |                 // if mobile we use a backdrop because click events don't delegate
 47 |                 $('<div class="' + backdrop.substr(1) + '"/>').appendTo(menuTree[0]).on('click', closeOpened)
 48 |             }
 49 | 
 50 |             for (var i = 0, s = menuTree.length; i < s; i++) {
 51 |                 if (!menuTree[i].hasClass(openClass)) {
 52 |                     menuTree[i].addClass(openClass);
 53 |                     positioning(menuTree[i].children('.' + menuClass), menuTree[i]);
 54 |                 }
 55 |             }
 56 |             opened = menuTree[0];
 57 |         }
 58 | 
 59 |         return false;
 60 |     };
 61 | 
 62 |     proto.keydown = function (e) {
 63 |         if (!/(38|40|27)/.test(e.keyCode)) return;
 64 | 
 65 |         var $this = $(this);
 66 | 
 67 |         e.preventDefault();
 68 |         e.stopPropagation();
 69 | 
 70 |         if ($this.is('.disabled, :disabled')) return;
 71 | 
 72 |         var $parent = getParent($this);
 73 |         var isActive = $parent.hasClass('open');
 74 | 
 75 |         if (!isActive || (isActive && e.keyCode == 27)) {
 76 |             if (e.which == 27) $parent.find(toggle).trigger('focus');
 77 |             return $this.trigger('click')
 78 |         }
 79 | 
 80 |         var desc = ' li:not(.divider):visible a';
 81 |         var desc1 = 'li:not(.divider):visible > input:not(disabled) ~ label';
 82 |         var $items = $parent.find(desc1 + ', ' + '[role="menu"]' + desc + ', [role="listbox"]' + desc);
 83 | 
 84 |         if (!$items.length) return;
 85 | 
 86 |         var index = $items.index($items.filter(':focus'));
 87 | 
 88 |         if (e.keyCode == 38 && index > 0)                 index--;                        // up
 89 |         if (e.keyCode == 40 && index < $items.length - 1) index++;                        // down
 90 |         if (!~index)                                      index = 0;
 91 | 
 92 |         $items.eq(index).trigger('focus')
 93 |     };
 94 | 
 95 |     proto.change = function (e) {
 96 | 
 97 |         var
 98 |             $parent,
 99 |             $menu,
100 |             $toggle,
101 |             selector,
102 |             text = '',
103 |             $items;
104 | 
105 |         $menu = $(this).closest('.' + menuClass);
106 | 
107 |         $toggle = $menu.parent().find('[data-label-placement]');
108 | 
109 |         if (!$toggle || !$toggle.length) {
110 |             $toggle = $menu.parent().find(toggle);
111 |         }
112 | 
113 |         if (!$toggle || !$toggle.length || $toggle.data('placeholder') === false)
114 |             return; // do nothing, no control
115 | 
116 |         ($toggle.data('placeholder') == undefined && $toggle.data('placeholder', $.trim($toggle.text())));
117 |         text = $.data($toggle[0], 'placeholder');
118 | 
119 |         $items = $menu.find('li > input:checked');
120 | 
121 |         if ($items.length) {
122 |             text = [];
123 |             $items.each(function () {
124 |                 var str = $(this).parent().find('label').eq(0),
125 |                     label = str.find('.data-label');
126 | 
127 |                 if (label.length) {
128 |                     var p = $('<p></p>');
129 |                     p.append(label.clone());
130 |                     str = p.html();
131 |                 }
132 |                 else {
133 |                     str = str.html();
134 |                 }
135 | 
136 | 
137 |                 str && text.push($.trim(str));
138 |             });
139 | 
140 |             text = text.length < 4 ? text.join(', ') : text.length + ' selected';
141 |         }
142 | 
143 |         var caret = $toggle.find('.caret');
144 | 
145 |         $toggle.html(text || '&nbsp;');
146 |         if (caret.length)
147 |             $toggle.append(' ') && caret.appendTo($toggle);
148 | 
149 |     };
150 | 
151 |     function positioning($menu, $control) {
152 |         if ($menu.hasClass('pull-center')) {
153 |             $menu.css('margin-right', $menu.outerWidth() / -2);
154 |         }
155 | 
156 |         if ($menu.hasClass('pull-middle')) {
157 |             $menu.css('margin-top', ($menu.outerHeight() / -2) - ($control.outerHeight() / 2));
158 |         }
159 |     }
160 | 
161 |     function closeOpened(event, menuTree) {
162 |         if (opened) {
163 | 
164 |             if (!menuTree) {
165 |                 menuTree = [opened];
166 |             }
167 | 
168 |             var parent;
169 | 
170 |             if (opened[0] !== menuTree[0][0]) {
171 |                 parent = opened;
172 |             } else {
173 |                 parent = menuTree[menuTree.length - 1];
174 |                 if (parent.parent().hasClass(menuClass)) {
175 |                     parent = parent.parent();
176 |                 }
177 |             }
178 | 
179 |             parent.find('.' + openClass).removeClass(openClass);
180 | 
181 |             if (parent.hasClass(openClass))
182 |                 parent.removeClass(openClass);
183 | 
184 |             if (parent === opened) {
185 |                 opened = null;
186 |                 $(backdrop).remove();
187 |             }
188 |         }
189 |     }
190 | 
191 |     function getSubMenuParents($submenu) {
192 |         var result = [$submenu];
193 |         var $parent;
194 |         while (!$parent || $parent.hasClass(subMenuClass)) {
195 |             $parent = ($parent || $submenu).parent();
196 |             if ($parent.hasClass(menuClass)) {
197 |                 $parent = $parent.parent();
198 |             }
199 |             if ($parent.children(toggle)) {
200 |                 result.unshift($parent);
201 |             }
202 |         }
203 |         return result;
204 |     }
205 | 
206 |     function getParent($this) {
207 |         var selector = $this.attr('data-target');
208 | 
209 |         if (!selector) {
210 |             selector = $this.attr('href');
211 |             selector = selector && /#[A-Za-z]/.test(selector) && selector.replace(/.*(?=#[^\s]*$)/, ''); //strip for ie7
212 |         }
213 | 
214 |         var $parent = selector && $(selector);
215 | 
216 |         return $parent && $parent.length ? $parent : $this.parent()
217 |     }
218 | 
219 |     // DROPDOWN PLUGIN DEFINITION
220 |     // ==========================
221 | 
222 |     var old = $.fn.dropdown;
223 | 
224 |     $.fn.dropdown = function (option) {
225 |         return this.each(function () {
226 |             var $this = $(this);
227 |             var data = $this.data('bs.dropdown');
228 | 
229 |             if (!data) $this.data('bs.dropdown', (data = new Dropdown(this)));
230 |             if (typeof option == 'string') data[option].call($this);
231 |         })
232 |     };
233 | 
234 |     $.fn.dropdown.Constructor = Dropdown;
235 | 
236 |     $.fn.dropdown.clearMenus = function(e) {
237 |         $(backdrop).remove();
238 |         $('.' + openClass + ' ' + toggle).each(function () {
239 |             var $parent = getParent($(this));
240 |             var relatedTarget = { relatedTarget: this };
241 |             if (!$parent.hasClass('open')) return;
242 |             $parent.trigger(e = $.Event('hide' + eventNamespace, relatedTarget));
243 |             if (e.isDefaultPrevented()) return;
244 |             $parent.removeClass('open').trigger('hidden' + eventNamespace, relatedTarget);
245 |         });
246 |         return this;
247 |     };
248 | 
249 | 
250 |     // DROPDOWN NO CONFLICT
251 |     // ====================
252 | 
253 |     $.fn.dropdown.noConflict = function () {
254 |         $.fn.dropdown = old;
255 |         return this
256 |     };
257 | 
258 | 
259 |     $(document).off(namespace)
260 |         .on('click' + namespace, closeOpened)
261 |         .on('click' + namespace, toggle, proto.toggle)
262 |         .on('click' + namespace, '.dropdown-menu > li > input[type="checkbox"] ~ label, .dropdown-menu > li > input[type="checkbox"], .dropdown-menu.noclose > li', function (e) {
263 |             e.stopPropagation()
264 |         })
265 |         .on('change' + namespace, '.dropdown-menu > li > input[type="checkbox"], .dropdown-menu > li > input[type="radio"]', proto.change)
266 |         .on('keydown' + namespace, toggle + ', [role="menu"], [role="listbox"]', proto.keydown)
267 | }(jQuery));


--------------------------------------------------------------------------------
/lab4/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2017 Deep RL Bootcamp Organizers.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/lab4/alg_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Copyright 2017 Deep RL Bootcamp Organizers.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | 
  8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9 | 
 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 11 | 
 12 | """
 13 | 
 14 | 
 15 | from utils import *
 16 | 
 17 | 
 18 | # ==============================
 19 | # Shared utilities
 20 | # ==============================
 21 | 
 22 | def compute_cumulative_returns(rewards, baselines, discount):
 23 |     # This method builds up the cumulative sum of discounted rewards for each time step:
 24 |     # R[t] = sum_{t'>=t} γ^(t'-t)*r_t'
 25 |     # Note that we use γ^(t'-t) instead of γ^t'. This gives us a biased gradient but lower variance
 26 |     returns = []
 27 |     # Use the last baseline prediction to back up
 28 |     cum_return = baselines[-1]
 29 |     for reward in rewards[::-1]:
 30 |         cum_return = cum_return * discount + reward
 31 |         returns.append(cum_return)
 32 |     return returns[::-1]
 33 | 
 34 | 
 35 | def compute_advantages(rewards, baselines, discount, gae_lambda):
 36 |     # Given returns R_t and baselines b(s_t), compute (generalized) advantage estimate A_t
 37 |     deltas = rewards + discount * baselines[1:] - baselines[:-1]
 38 |     advs = []
 39 |     cum_adv = 0
 40 |     multiplier = discount * gae_lambda
 41 |     for delta in deltas[::-1]:
 42 |         cum_adv = cum_adv * multiplier + delta
 43 |         advs.append(cum_adv)
 44 |     return advs[::-1]
 45 | 
 46 | 
 47 | def compute_pg_vars(trajs, policy, baseline, discount, gae_lambda):
 48 |     """
 49 |     Compute chainer variables needed for various policy gradient algorithms
 50 |     """
 51 |     for traj in trajs:
 52 |         # Include the last observation here, in case the trajectory is not finished
 53 |         baselines = baseline.predict(np.concatenate(
 54 |             [traj["observations"], [traj["last_observation"]]]))
 55 |         if traj['finished']:
 56 |             # If already finished, the future cumulative rewards starting from the final state is 0
 57 |             baselines[-1] = 0.
 58 |         # This is useful when fitting baselines. It uses the baseline prediction of the last state value to perform
 59 |         # Bellman backup if the trajectory is not finished.
 60 |         traj['returns'] = compute_cumulative_returns(
 61 |             traj['rewards'], baselines, discount)
 62 |         traj['advantages'] = compute_advantages(
 63 |             traj['rewards'], baselines, discount, gae_lambda)
 64 |         traj['baselines'] = baselines[:-1]
 65 | 
 66 |     # First, we compute a flattened list of observations, actions, and advantages
 67 |     all_obs = np.concatenate([traj['observations'] for traj in trajs], axis=0)
 68 |     all_acts = np.concatenate([traj['actions'] for traj in trajs], axis=0)
 69 |     all_advs = np.concatenate([traj['advantages'] for traj in trajs], axis=0)
 70 |     all_dists = {
 71 |         k: np.concatenate([traj['distributions'][k] for traj in trajs], axis=0)
 72 |         for k in trajs[0]['distributions'].keys()
 73 |     }
 74 | 
 75 |     # Normalizing the advantage values can make the algorithm more robust to reward scaling
 76 |     all_advs = (all_advs - np.mean(all_advs)) / (np.std(all_advs) + 1e-8)
 77 | 
 78 |     # Form chainer variables
 79 |     all_obs = Variable(all_obs)
 80 |     all_acts = Variable(all_acts)
 81 |     all_advs = Variable(all_advs.astype(np.float32, copy=False))
 82 |     all_dists = policy.distribution.from_dict(
 83 |         {k: Variable(v) for k, v in all_dists.items()})
 84 | 
 85 |     return all_obs, all_acts, all_advs, all_dists
 86 | 
 87 | 
 88 | # ==============================
 89 | # Helper methods for logging
 90 | # ==============================
 91 | 
 92 | def log_reward_statistics(env):
 93 |     # keep unwrapping until we get the monitor
 94 |     while not isinstance(env, gym.wrappers.Monitor):  # and not isinstance()
 95 |         if not isinstance(env, gym.Wrapper):
 96 |             assert False
 97 |         env = env.env
 98 |     # env.unwrapped
 99 |     assert isinstance(env, gym.wrappers.Monitor)
100 |     all_stats = None
101 |     for _ in range(10):
102 |         try:
103 |             all_stats = gym.wrappers.monitoring.load_results(env.directory)
104 |         except FileNotFoundError:
105 |             time.sleep(1)
106 |             continue
107 |     if all_stats is not None:
108 |         episode_rewards = all_stats['episode_rewards']
109 |         episode_lengths = all_stats['episode_lengths']
110 | 
111 |         recent_episode_rewards = episode_rewards[-100:]
112 |         recent_episode_lengths = episode_lengths[-100:]
113 | 
114 |         if len(recent_episode_rewards) > 0:
115 |             logger.logkv('AverageReturn', np.mean(recent_episode_rewards))
116 |             logger.logkv('MinReturn', np.min(recent_episode_rewards))
117 |             logger.logkv('MaxReturn', np.max(recent_episode_rewards))
118 |             logger.logkv('StdReturn', np.std(recent_episode_rewards))
119 |             logger.logkv('AverageEpisodeLength',
120 |                          np.mean(recent_episode_lengths))
121 |             logger.logkv('MinEpisodeLength', np.min(recent_episode_lengths))
122 |             logger.logkv('MaxEpisodeLength', np.max(recent_episode_lengths))
123 |             logger.logkv('StdEpisodeLength', np.std(recent_episode_lengths))
124 | 
125 |         logger.logkv('TotalNEpisodes', len(episode_rewards))
126 |         logger.logkv('TotalNSamples', np.sum(episode_lengths))
127 | 
128 | 
129 | def log_baseline_statistics(trajs):
130 |     # Specifically, compute the explained variance, defined as
131 |     baselines = np.concatenate([traj['baselines'] for traj in trajs])
132 |     returns = np.concatenate([traj['returns'] for traj in trajs])
133 |     logger.logkv('ExplainedVariance',
134 |                  explained_variance_1d(baselines, returns))
135 | 
136 | 
137 | def log_action_distribution_statistics(dists):
138 |     with chainer.no_backprop_mode():
139 |         entropy = F.mean(dists.entropy()).data
140 |         logger.logkv('Entropy', entropy)
141 |         logger.logkv('Perplexity', np.exp(entropy))
142 |         if isinstance(dists, Gaussian):
143 |             logger.logkv('AveragePolicyStd', F.mean(
144 |                 F.exp(dists.log_stds)).data)
145 |             for idx in range(dists.log_stds.shape[-1]):
146 |                 logger.logkv('AveragePolicyStd[{}]'.format(
147 |                     idx), F.mean(F.exp(dists.log_stds[..., idx])).data)
148 |         elif isinstance(dists, Categorical):
149 |             probs = F.mean(F.softmax(dists.logits), axis=0).data
150 |             for idx in range(len(probs)):
151 |                 logger.logkv('AveragePolicyProb[{}]'.format(idx), probs[idx])
152 | 


--------------------------------------------------------------------------------
/lab4/algs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 3 | 
 4 | Copyright 2017 Deep RL Bootcamp Organizers.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 
12 | """
13 | 
14 | 
15 | from pg import pg
16 | from trpo import trpo
17 | from a2c import a2c
18 | 


--------------------------------------------------------------------------------
/lab4/docker_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | viskit_port=$("$DIR/findport.py" 5000 1)
 4 | xhost=xhost
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | if [[ $(uname) == 'Darwin' ]]; then
12 |     # if xhost not defined, check 
13 |     if ! hash $xhost 2>/dev/null; then
14 |         xhost=/opt/X11/bin/xhost
15 |         if [ ! -f $xhost ]; then
16 |             echo "xhost not found!"
17 |             exit
18 |         fi
19 |     fi
20 |     ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}')
21 |     $xhost + $ip >/dev/null
22 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
23 |         -e DISPLAY=$ip:0 \
24 |         -v "$DIR":/root/code/bootcamp_pg \
25 |         -ti dementrock/deeprlbootcamp \
26 |           ${1-/bin/bash} "${@:2}"
27 |     $xhost - $ip >/dev/null
28 | elif [[ $(uname) == 'Linux' ]]; then
29 |     $xhost +local:root >/dev/null
30 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
31 |         -e DISPLAY=$DISPLAY \
32 |         -v /tmp/.X11-unix:/tmp/.X11-unix \
33 |         -v "$DIR":/root/code/bootcamp_pg \
34 |         -ti dementrock/deeprlbootcamp \
35 |           ${1-/bin/bash} "${@:2}"
36 |     $xhost -local:root >/dev/null
37 | else
38 |     echo "This script only supports macOS or Linux"
39 | fi
40 | 


--------------------------------------------------------------------------------
/lab4/docker_run_vnc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | vnc_port=$("$DIR/findport.py" 3000 1)
 4 | viskit_port=$("$DIR/findport.py" 5000 1)
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284"
12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
13 |     -v "$DIR":/root/code/bootcamp_pg \
14 |     -ti dementrock/deeprlbootcamp \
15 |       ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}"
16 | 


--------------------------------------------------------------------------------
/lab4/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | channels:
 3 |     - menpo
 4 |     - soumith
 5 | dependencies:
 6 |     - python==3.5.3
 7 |     - opencv3=3.1.0
 8 |     - numpy==1.13.1
 9 |     - scipy==0.19.1
10 |     - pip:
11 |         - gym==0.9.2
12 |         - chainer==2.0.1
13 |         - ipdb==0.10.3
14 |         - tblib==1.3.2
15 |         - atari_py==0.1.1
16 |         - Pillow==4.2.1
17 |         - PyOpenGL==3.1.0
18 |         - cloudpickle==0.3.1
19 |         - click==6.7
20 |         - python-dateutil==2.6.1
21 |         - pyyaml==3.12
22 |         - easydict==1.7
23 |         - boto3==1.4.4
24 |         - mako==1.0.7
25 |         - redis==2.10.5
26 |         - Flask==0.12.2
27 |         - plotly==2.0.12
28 |         - tqdm==4.14.0
29 |         - cupy==1.0.1; 'linux' in sys_platform
30 |         - cached-property==1.3.0
31 |         - h5py==2.7.0
32 | 


--------------------------------------------------------------------------------
/lab4/experiments/run_a2c_breakout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | from algs import a2c
18 | from env_makers import EnvMaker
19 | from models import CategoricalCNNPolicy
20 | from utils import SnapshotSaver
21 | import numpy as np
22 | import os
23 | import logger
24 | 
25 | log_dir = "data/local/a2c-breakout"
26 | 
27 | np.random.seed(42)
28 | 
29 | # Clean up existing logs
30 | os.system("rm -rf {}".format(log_dir))
31 | 
32 | with logger.session(log_dir):
33 |     env_maker = EnvMaker('BreakoutNoFrameskip-v4')
34 |     env = env_maker.make()
35 |     policy = CategoricalCNNPolicy(
36 |         env.observation_space, env.action_space, env.spec)
37 |     vf = policy.create_vf()
38 |     a2c(
39 |         env=env,
40 |         env_maker=env_maker,
41 |         n_envs=16,
42 |         policy=policy,
43 |         vf=vf,
44 |         snapshot_saver=SnapshotSaver(log_dir, interval=10),
45 |     )
46 | 


--------------------------------------------------------------------------------
/lab4/experiments/run_a2c_pong.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | from algs import a2c
18 | from env_makers import EnvMaker
19 | from models import CategoricalCNNPolicy
20 | from utils import SnapshotSaver
21 | import numpy as np
22 | import os
23 | import logger
24 | 
25 | log_dir = "data/local/a2c-pong"
26 | 
27 | np.random.seed(42)
28 | 
29 | # Clean up existing logs
30 | os.system("rm -rf {}".format(log_dir))
31 | 
32 | with logger.session(log_dir):
33 |     env_maker = EnvMaker('PongNoFrameskip-v4')
34 |     env = env_maker.make()
35 |     policy = CategoricalCNNPolicy(
36 |         env.observation_space, env.action_space, env.spec)
37 |     vf = policy.create_vf()
38 |     a2c(
39 |         env=env,
40 |         env_maker=env_maker,
41 |         n_envs=16,
42 |         policy=policy,
43 |         vf=vf,
44 |         snapshot_saver=SnapshotSaver(log_dir, interval=10),
45 |     )
46 | 


--------------------------------------------------------------------------------
/lab4/experiments/run_a2c_pong_warm_start.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | from utils import SnapshotSaver
18 | import numpy as np
19 | import os
20 | import logger
21 | import pickle
22 | 
23 | log_dir = "data/local/a2c-pong-warm-start"
24 | 
25 | np.random.seed(42)
26 | 
27 | # Clean up existing logs
28 | os.system("rm -rf {}".format(log_dir))
29 | 
30 | with logger.session(log_dir):
31 |     with open("pong_warm_start.pkl", "rb") as f:
32 |         state = pickle.load(f)
33 |     saver = SnapshotSaver(log_dir, interval=10)
34 |     alg_state = state['alg_state']
35 |     env = alg_state['env_maker'].make()
36 |     alg = state['alg']
37 |     alg(env=env, snapshot_saver=saver, **alg_state)
38 | 


--------------------------------------------------------------------------------
/lab4/experiments/run_pg_cartpole.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | import chainer
18 | 
19 | from algs import pg
20 | from env_makers import EnvMaker
21 | from models import CategoricalMLPPolicy, MLPBaseline
22 | from utils import SnapshotSaver
23 | import numpy as np
24 | import os
25 | import logger
26 | 
27 | log_dir = "data/local/pg-cartpole"
28 | 
29 | np.random.seed(42)
30 | 
31 | # Clean up existing logs
32 | os.system("rm -rf {}".format(log_dir))
33 | 
34 | with logger.session(log_dir):
35 |     env_maker = EnvMaker('CartPole-v0')
36 |     env = env_maker.make()
37 |     policy = CategoricalMLPPolicy(observation_space=env.observation_space, action_space=env.action_space,
38 |                                   env_spec=env.spec)
39 |     baseline = MLPBaseline(observation_space=env.observation_space, action_space=env.action_space,
40 |                            env_spec=env.spec)
41 |     pg(
42 |         env=env,
43 |         env_maker=env_maker,
44 |         n_envs=16,
45 |         policy=policy,
46 |         baseline=baseline,
47 |         batch_size=2000,
48 |         n_iters=100,
49 |         snapshot_saver=SnapshotSaver(log_dir),
50 |         optimizer=chainer.optimizers.Adam(1e-2)
51 |     )
52 | 


--------------------------------------------------------------------------------
/lab4/experiments/run_trpo_cartpole.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | from algs import trpo
18 | from env_makers import EnvMaker
19 | from models import CategoricalMLPPolicy, MLPBaseline
20 | from utils import SnapshotSaver
21 | import numpy as np
22 | import os
23 | import logger
24 | 
25 | log_dir = "data/local/trpo-cartpole"
26 | 
27 | np.random.seed(42)
28 | 
29 | # Clean up existing logs
30 | os.system("rm -rf {}".format(log_dir))
31 | 
32 | with logger.session(log_dir):
33 |     env_maker = EnvMaker('CartPole-v0')
34 |     env = env_maker.make()
35 |     policy = CategoricalMLPPolicy(
36 |         observation_space=env.observation_space,
37 |         action_space=env.action_space,
38 |         env_spec=env.spec
39 |     )
40 |     baseline = MLPBaseline(
41 |         observation_space=env.observation_space,
42 |         action_space=env.action_space,
43 |         env_spec=env.spec
44 |     )
45 |     trpo(
46 |         env=env,
47 |         env_maker=env_maker,
48 |         n_envs=16,
49 |         policy=policy,
50 |         baseline=baseline,
51 |         batch_size=2000,
52 |         n_iters=100,
53 |         snapshot_saver=SnapshotSaver(log_dir)
54 |     )
55 | 


--------------------------------------------------------------------------------
/lab4/experiments/run_trpo_half_cheetah.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | import chainer
18 | 
19 | from algs import trpo
20 | from env_makers import EnvMaker
21 | from models import GaussianMLPPolicy, MLPBaseline
22 | from utils import SnapshotSaver
23 | import numpy as np
24 | import os
25 | import logger
26 | 
27 | log_dir = "data/local/trpo-half-cheetah"
28 | 
29 | np.random.seed(42)
30 | 
31 | # Clean up existing logs
32 | os.system("rm -rf {}".format(log_dir))
33 | 
34 | with logger.session(log_dir):
35 |     env_maker = EnvMaker('RoboschoolHalfCheetah-v1')
36 |     env = env_maker.make()
37 |     policy = GaussianMLPPolicy(
38 |         observation_space=env.observation_space,
39 |         action_space=env.action_space,
40 |         env_spec=env.spec,
41 |         hidden_sizes=(256, 64),
42 |         hidden_nonlinearity=chainer.functions.tanh,
43 |     )
44 |     baseline = MLPBaseline(
45 |         observation_space=env.observation_space,
46 |         action_space=env.action_space,
47 |         env_spec=env.spec,
48 |         hidden_sizes=(256, 64),
49 |         hidden_nonlinearity=chainer.functions.tanh,
50 |     )
51 |     trpo(
52 |         env=env,
53 |         env_maker=env_maker,
54 |         n_envs=16,
55 |         policy=policy,
56 |         baseline=baseline,
57 |         batch_size=5000,
58 |         n_iters=5000,
59 |         snapshot_saver=SnapshotSaver(log_dir, interval=10),
60 |     )
61 | 


--------------------------------------------------------------------------------
/lab4/experiments/run_trpo_pendulum.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | import chainer
18 | 
19 | from algs import trpo
20 | from env_makers import EnvMaker
21 | from models import GaussianMLPPolicy, MLPBaseline
22 | from utils import SnapshotSaver
23 | import numpy as np
24 | import os
25 | import logger
26 | 
27 | log_dir = "data/local/trpo-pendulum"
28 | 
29 | np.random.seed(42)
30 | 
31 | # Clean up existing logs
32 | os.system("rm -rf {}".format(log_dir))
33 | 
34 | with logger.session(log_dir):
35 |     env_maker = EnvMaker('Pendulum-v0')
36 |     env = env_maker.make()
37 |     policy = GaussianMLPPolicy(
38 |         observation_space=env.observation_space,
39 |         action_space=env.action_space,
40 |         env_spec=env.spec,
41 |         hidden_sizes=(64, 64),
42 |         hidden_nonlinearity=chainer.functions.tanh,
43 |     )
44 |     baseline = MLPBaseline(
45 |         observation_space=env.observation_space,
46 |         action_space=env.action_space,
47 |         env_spec=env.spec,
48 |         hidden_sizes=(64, 64),
49 |         hidden_nonlinearity=chainer.functions.tanh,
50 |     )
51 |     trpo(
52 |         env=env,
53 |         env_maker=env_maker,
54 |         n_envs=16,
55 |         policy=policy,
56 |         baseline=baseline,
57 |         batch_size=10000,
58 |         n_iters=100,
59 |         snapshot_saver=SnapshotSaver(log_dir),
60 |     )
61 | 


--------------------------------------------------------------------------------
/lab4/findport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | #
17 | # Usage: findport.py 3000 100
18 | #
19 | from __future__ import print_function
20 | import socket
21 | from contextlib import closing
22 | import sys
23 | 
24 | if len(sys.argv) != 3:
25 |     print("Usage: {} <base_port> <increment>".format(sys.argv[0]))
26 |     sys.exit(1)
27 | 
28 | base = int(sys.argv[1])
29 | increment = int(sys.argv[2])
30 | 
31 | 
32 | def find_free_port():
33 |     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
34 |         for port in range(base, 65536, increment):
35 |             try:
36 |                 s.bind(('', port))
37 |                 return s.getsockname()[1]
38 |             except socket.error:
39 |                 continue
40 | 
41 | 
42 | print(find_free_port())
43 | 


--------------------------------------------------------------------------------
/lab4/lab4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab4/lab4.pdf


--------------------------------------------------------------------------------
/lab4/launch_bg_screen_buffer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | killall() {
 4 | kill -INT "$xvfb_pid" 
 5 | kill -INT "$x11vnc_pid" 
 6 | exit
 7 | }
 8 | 
 9 | trap killall SIGINT
10 | trap killall SIGTERM
11 | trap killall SIGKILL
12 | 
13 | Xvfb :99 -screen 0 1024x768x24 -ac  +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$!
14 | 
15 | mkdir ~/.x11vnc
16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd
17 | 
18 | command="${1-/bin/bash} ${@:2}"
19 | 
20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!"
21 | 
22 | DISPLAY=:99 $command
23 | 
24 | killall
25 | 


--------------------------------------------------------------------------------
/lab4/logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines
  5 | 
  6 | Copyright 2017 Deep RL Bootcamp Organizers.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  9 | 
 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 11 | 
 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 13 | """
 14 | 
 15 | 
 16 | import datetime
 17 | from collections import OrderedDict
 18 | import os
 19 | import sys
 20 | import shutil
 21 | import os.path as osp
 22 | import json
 23 | 
 24 | import dateutil.tz
 25 | 
 26 | LOG_OUTPUT_FORMATS = ['stdout', 'log', 'json']
 27 | 
 28 | DEBUG = 10
 29 | INFO = 20
 30 | WARN = 30
 31 | ERROR = 40
 32 | 
 33 | DISABLED = 50
 34 | 
 35 | 
 36 | class OutputFormat(object):
 37 |     def writekvs(self, kvs):
 38 |         """
 39 |         Write key-value pairs
 40 |         """
 41 |         raise NotImplementedError
 42 | 
 43 |     def writeseq(self, args):
 44 |         """
 45 |         Write a sequence of other data (e.g. a logging message)
 46 |         """
 47 |         pass
 48 | 
 49 |     def close(self):
 50 |         return
 51 | 
 52 | 
 53 | class HumanOutputFormat(OutputFormat):
 54 |     def __init__(self, file):
 55 |         self.file = file
 56 | 
 57 |     def writekvs(self, kvs):
 58 |         # Create strings for printing
 59 |         key2str = OrderedDict()
 60 |         for (key, val) in kvs.items():
 61 |             valstr = '%-8.5g' % (val,) if hasattr(val, '__float__') else val
 62 |             key2str[self._truncate(key)] = self._truncate(valstr)
 63 | 
 64 |         # Find max widths
 65 |         keywidth = max(map(len, key2str.keys()))
 66 |         valwidth = max(map(len, key2str.values()))
 67 | 
 68 |         # Write out the data
 69 |         dashes = '-' * (keywidth + valwidth + 7)
 70 |         lines = [dashes]
 71 |         for (key, val) in key2str.items():
 72 |             lines.append('| %s%s | %s%s |' % (
 73 |                 key,
 74 |                 ' ' * (keywidth - len(key)),
 75 |                 val,
 76 |                 ' ' * (valwidth - len(val)),
 77 |             ))
 78 |         lines.append(dashes)
 79 |         self.file.write('\n'.join(lines) + '\n')
 80 | 
 81 |         # Flush the output to the file
 82 |         self.file.flush()
 83 | 
 84 |     def _truncate(self, s):
 85 |         return s[:20] + '...' if len(s) > 23 else s
 86 | 
 87 |     def writeseq(self, args):
 88 |         for arg in args:
 89 |             self.file.write(arg)
 90 |         self.file.write('\n')
 91 |         self.file.flush()
 92 | 
 93 | 
 94 | class JSONOutputFormat(OutputFormat):
 95 |     def __init__(self, file):
 96 |         self.file = file
 97 | 
 98 |     def writekvs(self, kvs):
 99 |         for k, v in kvs.items():
100 |             if hasattr(v, 'dtype'):
101 |                 v = v.tolist()
102 |                 kvs[k] = float(v)
103 |         self.file.write(json.dumps(kvs) + '\n')
104 |         self.file.flush()
105 | 
106 | 
107 | def make_output_format(format, ev_dir):
108 |     os.makedirs(ev_dir, exist_ok=True)
109 |     if format == 'stdout':
110 |         return HumanOutputFormat(sys.stdout)
111 |     elif format == 'log':
112 |         log_file = open(osp.join(ev_dir, 'log.txt'), 'at')
113 |         return HumanOutputFormat(log_file)
114 |     elif format == 'json':
115 |         json_file = open(osp.join(ev_dir, 'progress.json'), 'at')
116 |         return JSONOutputFormat(json_file)
117 |     else:
118 |         raise ValueError('Unknown format specified: %s' % (format,))
119 | 
120 | 
121 | # ================================================================
122 | # API
123 | # ================================================================
124 | 
125 | 
126 | def logkv(key, val):
127 |     """
128 |     Log a value of some diagnostic
129 |     Call this once for each diagnostic quantity, each iteration
130 |     """
131 |     Logger.CURRENT.logkv(key, val)
132 | 
133 | 
134 | def dumpkvs():
135 |     """
136 |     Write all of the diagnostics from the current iteration
137 | 
138 |     level: int. (see old_logger.py docs) If the global logger level is higher than
139 |                 the level argument here, don't print to stdout.
140 |     """
141 |     Logger.CURRENT.dumpkvs()
142 | 
143 | 
144 | # for backwards compatibility
145 | record_tabular = logkv
146 | dump_tabular = dumpkvs
147 | 
148 | 
149 | def log(*args, level=INFO):
150 |     """
151 |     Write the sequence of args, with no separators, to the console and output files (if you've configured an output file).
152 |     """
153 |     Logger.CURRENT.log(*args, level=level)
154 | 
155 | 
156 | def debug(*args):
157 |     log(*args, level=DEBUG)
158 | 
159 | 
160 | def info(*args):
161 |     log(*args, level=INFO)
162 | 
163 | 
164 | def warn(*args):
165 |     log(*args, level=WARN)
166 | 
167 | 
168 | def error(*args):
169 |     log(*args, level=ERROR)
170 | 
171 | 
172 | def set_level(level):
173 |     """
174 |     Set logging threshold on current logger.
175 |     """
176 |     Logger.CURRENT.set_level(level)
177 | 
178 | 
179 | def get_level():
180 |     """
181 |     Set logging threshold on current logger.
182 |     """
183 |     return Logger.CURRENT.level
184 | 
185 | 
186 | def get_dir():
187 |     """
188 |     Get directory that log files are being written to.
189 |     will be None if there is no output directory (i.e., if you didn't call start)
190 |     """
191 |     return Logger.CURRENT.get_dir()
192 | 
193 | 
194 | def get_expt_dir():
195 |     sys.stderr.write(
196 |         "get_expt_dir() is Deprecated. Switch to get_dir() [%s]\n" % (get_dir(),))
197 |     return get_dir()
198 | 
199 | 
200 | # ================================================================
201 | # Backend
202 | # ================================================================
203 | 
204 | 
205 | class Logger(object):
206 |     # A logger with no output files. (See right below class definition)
207 |     DEFAULT = None
208 |     # So that you can still log to the terminal without setting up any output files
209 |     CURRENT = None  # Current logger being used by the free functions above
210 | 
211 |     def __init__(self, dir, output_formats):
212 |         self.name2val = OrderedDict()  # values this iteration
213 |         self.level = INFO
214 |         self.dir = dir
215 |         self.output_formats = output_formats
216 | 
217 |     # Logging API, forwarded
218 |     # ----------------------------------------
219 |     def logkv(self, key, val):
220 |         self.name2val[key] = val
221 | 
222 |     def dumpkvs(self):
223 |         for fmt in self.output_formats:
224 |             fmt.writekvs(self.name2val)
225 |         self.name2val.clear()
226 | 
227 |     def log(self, *args, level=INFO):
228 |         now = datetime.datetime.now(dateutil.tz.tzlocal())
229 |         timestamp = now.strftime('[%Y-%m-%d %H:%M:%S.%f %Z] ')
230 |         if self.level <= level:
231 |             self._do_log((timestamp,) + args)
232 | 
233 |     # Configuration
234 |     # ----------------------------------------
235 |     def set_level(self, level):
236 |         self.level = level
237 | 
238 |     def get_dir(self):
239 |         return self.dir
240 | 
241 |     def close(self):
242 |         for fmt in self.output_formats:
243 |             fmt.close()
244 | 
245 |     # Misc
246 |     # ----------------------------------------
247 |     def _do_log(self, args):
248 |         for fmt in self.output_formats:
249 |             fmt.writeseq(args)
250 | 
251 | 
252 | # ================================================================
253 | 
254 | Logger.DEFAULT = Logger(
255 |     output_formats=[HumanOutputFormat(sys.stdout)], dir=None)
256 | Logger.CURRENT = Logger.DEFAULT
257 | 
258 | 
259 | class session(object):
260 |     """
261 |     Context manager that sets up the loggers for an experiment.
262 |     """
263 | 
264 |     CURRENT = None  # Set to a LoggerContext object using enter/exit or context manager
265 | 
266 |     def __init__(self, dir, format_strs=None):
267 |         self.dir = dir
268 |         if format_strs is None:
269 |             format_strs = LOG_OUTPUT_FORMATS
270 |         output_formats = [make_output_format(f, dir) for f in format_strs]
271 |         Logger.CURRENT = Logger(dir=dir, output_formats=output_formats)
272 | 
273 |     def __enter__(self):
274 |         os.makedirs(self.evaluation_dir(), exist_ok=True)
275 |         output_formats = [make_output_format(
276 |             f, self.evaluation_dir()) for f in LOG_OUTPUT_FORMATS]
277 |         Logger.CURRENT = Logger(dir=self.dir, output_formats=output_formats)
278 | 
279 |     def __exit__(self, *args):
280 |         Logger.CURRENT.close()
281 |         Logger.CURRENT = Logger.DEFAULT
282 | 
283 |     def evaluation_dir(self):
284 |         return self.dir
285 | 
286 | 
287 | # ================================================================
288 | 
289 | 
290 | def _demo():
291 |     info("hi")
292 |     debug("shouldn't appear")
293 |     set_level(DEBUG)
294 |     debug("should appear")
295 |     dir = "/tmp/testlogging"
296 |     if os.path.exists(dir):
297 |         shutil.rmtree(dir)
298 |     with session(dir=dir):
299 |         record_tabular("a", 3)
300 |         record_tabular("b", 2.5)
301 |         dump_tabular()
302 |         record_tabular("b", -2.5)
303 |         record_tabular("a", 5.5)
304 |         dump_tabular()
305 |         info("^^^ should see a = 5.5")
306 | 
307 |     record_tabular("b", -2.5)
308 |     dump_tabular()
309 | 
310 |     record_tabular("a", "longasslongasslongasslongasslongasslongassvalue")
311 |     dump_tabular()
312 | 
313 | 
314 | if __name__ == "__main__":
315 |     _demo()
316 | 


--------------------------------------------------------------------------------
/lab4/pg.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Copyright 2017 Deep RL Bootcamp Organizers.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | 
  8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9 | 
 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 11 | """
 12 | 
 13 | from alg_utils import *
 14 | from simplepg.simple_utils import test_once, nprs
 15 | import tests.pg_tests
 16 | 
 17 | 
 18 | def pg(env, env_maker, policy, baseline, n_envs=mp.cpu_count(), last_iter=-1, n_iters=100, batch_size=1000,
 19 |        optimizer=chainer.optimizers.Adam(), discount=0.99, gae_lambda=0.97, snapshot_saver=None):
 20 |     """
 21 |     This method implements policy gradient algorithm.
 22 |     :param env: An environment instance, which should have the same class as what env_maker.make() returns.
 23 |     :param env_maker: An object such that calling env_maker.make() will generate a new environment.
 24 |     :param policy: A stochastic policy which we will be optimizing.
 25 |     :param baseline: A baseline used for variance reduction and estimating future returns for unfinished trajectories.
 26 |     :param n_envs: Number of environments running simultaneously.
 27 |     :param last_iter: The index of the last iteration. This is normally -1 when starting afresh, but may be different when
 28 |            loaded from a snapshot.
 29 |     :param n_iters: The total number of iterations to run.
 30 |     :param batch_size: The number of samples used per iteration.
 31 |     :param optimizer: A Chainer optimizer instance. By default we use the Adam algorithm with learning rate 1e-3.
 32 |     :param discount: Discount factor.
 33 |     :param gae_lambda: Lambda parameter used for generalized advantage estimation.
 34 |     :param snapshot_saver: An object for saving snapshots.
 35 |     """
 36 | 
 37 |     if getattr(optimizer, 'target', None) is not policy:
 38 |         optimizer.setup(policy)
 39 | 
 40 |     logger.info("Starting env pool")
 41 |     with EnvPool(env_maker, n_envs=n_envs) as env_pool:
 42 |         for iter in range(last_iter + 1, n_iters):
 43 |             logger.info("Starting iteration {}".format(iter))
 44 |             logger.logkv('Iteration', iter)
 45 | 
 46 |             logger.info("Start collecting samples")
 47 |             trajs = parallel_collect_samples(env_pool, policy, batch_size)
 48 | 
 49 |             logger.info("Computing input variables for policy optimization")
 50 |             all_obs, all_acts, all_advs, _ = compute_pg_vars(
 51 |                 trajs, policy, baseline, discount, gae_lambda
 52 |             )
 53 | 
 54 |             # Begin policy update
 55 | 
 56 |             # Now, you need to implement the computation of the policy gradient
 57 |             # The policy gradient is given by -1/T \sum_t \nabla_\theta(log(p_\theta(a_t|s_t))) * A_t
 58 |             # Note the negative sign in the front, since optimizers are most often minimizing a loss rather
 59 |             # This is the same as \nabla_\theta(-1/T \sum_t log(p_\theta(a_t|s_t)) * A_t) = \nabla_\theta(L), where L is the surrogate loss term
 60 | 
 61 |             logger.info("Computing policy gradient")
 62 | 
 63 |             # Methods that may be useful:
 64 |             # - `dists.logli(actions)' returns the log probability of the actions under the distribution `dists'.
 65 |             #   This method returns a chainer variable.
 66 | 
 67 |             dists = policy.compute_dists(all_obs)
 68 | 
 69 |             def compute_surr_loss(dists, all_acts, all_advs):
 70 |                 """
 71 |                 :param dists: An instance of subclass of Distribution
 72 |                 :param all_acts: A chainer variable, which should be a matrix of size N * |A|
 73 |                 :param all_advs: A chainer variable, which should be a vector of size N
 74 |                 :return: A chainer variable, which should be a scalar
 75 |                 """
 76 |                 "*** YOUR CODE HERE ***"
 77 |                 return -F.mean(dists.logli(all_acts) * all_advs)
 78 | 
 79 |             test_once(compute_surr_loss)
 80 | 
 81 |             surr_loss = compute_surr_loss(dists, all_acts, all_advs)
 82 | 
 83 |             # reset gradients stored in the policy parameters
 84 |             policy.cleargrads()
 85 |             surr_loss.backward()
 86 | 
 87 |             # apply the computed gradient
 88 |             optimizer.update()
 89 | 
 90 |             # Update baseline
 91 |             logger.info("Updating baseline")
 92 |             baseline.update(trajs)
 93 | 
 94 |             # log statistics
 95 |             logger.info("Computing logging information")
 96 |             logger.logkv('SurrLoss', surr_loss.data)
 97 |             log_action_distribution_statistics(dists)
 98 |             log_reward_statistics(env)
 99 |             log_baseline_statistics(trajs)
100 |             logger.dumpkvs()
101 | 
102 |             if snapshot_saver is not None:
103 |                 logger.info("Saving snapshot")
104 |                 snapshot_saver.save_state(
105 |                     iter,
106 |                     dict(
107 |                         alg=pg,
108 |                         alg_state=dict(
109 |                             env_maker=env_maker,
110 |                             policy=policy,
111 |                             baseline=baseline,
112 |                             n_envs=n_envs,
113 |                             last_iter=iter,
114 |                             n_iters=n_iters,
115 |                             batch_size=batch_size,
116 |                             optimizer=optimizer,
117 |                             discount=discount,
118 |                             gae_lambda=gae_lambda
119 |                         )
120 |                     )
121 |                 )
122 | 


--------------------------------------------------------------------------------
/lab4/pong_warm_start.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab4/pong_warm_start.pkl


--------------------------------------------------------------------------------
/lab4/scripts/resume_training.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | from utils import SnapshotSaver
18 | import click
19 | import logger
20 | 
21 | 
22 | @click.command()
23 | @click.argument("dir")  # , "Directory which contains snapshot files")
24 | @click.option("--interval", help="Interval between saving snapshots", type=int, default=10)
25 | def main(dir, interval):
26 |     with logger.session(dir):
27 |         saver = SnapshotSaver(dir, interval=interval)
28 |         state = saver.get_state()
29 |         alg_state = state['alg_state']
30 |         env = alg_state['env_maker'].make()
31 |         alg = state['alg']
32 |         alg(env=env, snapshot_saver=saver, **alg_state)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     main()
37 | 


--------------------------------------------------------------------------------
/lab4/scripts/setup_xquartz.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Check if XQuartz is installed
 3 | 
 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@"
 5 | 
 6 | app_dir=/Applications/Utilities/XQuartz.app
 7 | 
 8 | if [ -d $app_dir ]; then
 9 |     # Check installed version
10 |     app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString)
11 |     if [ $app_version == "2.7.11" ]; then
12 |         defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
13 |         defaults write org.macosforge.xquartz.X11 no_auth -bool false
14 |         defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
15 |         echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!"
16 |         exit
17 |     else
18 |         read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response
19 |         case "$response" in
20 |             [yY][eE][sS]|[yY]) 
21 |                 ;;
22 |             *)
23 |                 exit
24 |                 ;;
25 |         esac
26 |     fi
27 | fi
28 | 
29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg
30 | dmg_path=/tmp/xquartz.dmg
31 | echo "Downloading dmg from $url..."
32 | /usr/bin/curl -L -o $dmg_path $url
33 | echo "Mounting dmg file..."
34 | hdiutil mount $dmg_path
35 | echo "Installing..."
36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg  -target /
37 | 
38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false
40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
41 | 
42 | echo "Done! Make sure to log out and then log back in for the changes to take effect."
43 | 


--------------------------------------------------------------------------------
/lab4/scripts/sim_policy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | from utils import SnapshotSaver
18 | import click
19 | import time
20 | import os
21 | 
22 | 
23 | @click.command()
24 | @click.argument("dir")
25 | def main(dir):
26 |     env = None
27 |     while True:
28 |         saver = SnapshotSaver(dir)
29 |         state = saver.get_state()
30 |         if state is None:
31 |             time.sleep(1)
32 |             continue
33 |         alg_state = state['alg_state']
34 |         if env is None:
35 |             env = alg_state['env_maker'].make()
36 |         policy = alg_state['policy']
37 |         ob = env.reset()
38 |         done = False
39 |         while not done:
40 |             action, _ = policy.get_action(ob)
41 |             ob, _, done, _ = env.step(action)
42 |             env.render()
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 


--------------------------------------------------------------------------------
/lab4/scripts/test_environment_setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | def main():
18 |     import roboschool
19 |     import gym
20 |     import chainer
21 |     env = gym.make('CartPole-v0')
22 |     env.reset()
23 |     env.step(env.action_space.sample())
24 |     env = gym.make('RoboschoolHalfCheetah-v1')
25 |     env.reset()
26 |     env.step(env.action_space.sample())
27 |     print("Your environment has been successfully set up!")
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/lab4/simplepg/point_env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 3 | 
 4 | Copyright 2017 Deep RL Bootcamp Organizers.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 
12 | """
13 | 
14 | 
15 | from gym import Env
16 | from gym.envs.registration import register
17 | from gym.utils import seeding
18 | from gym import spaces
19 | from gym.envs.classic_control.cartpole import CartPoleEnv
20 | import numpy as np
21 | 
22 | 
23 | class PointEnv(Env):
24 |     metadata = {
25 |         'render.modes': ['human', 'rgb_array'],
26 |         'video.frames_per_second': 50
27 |     }
28 | 
29 |     def __init__(self):
30 |         self.action_space = spaces.Box(low=-1, high=1, shape=(2,))
31 |         self.observation_space = spaces.Box(low=-1, high=1, shape=(2,))
32 | 
33 |         self._seed()
34 |         self.viewer = None
35 |         self.state = None
36 | 
37 |     def _seed(self, seed=None):
38 |         self.np_random, seed = seeding.np_random(seed)
39 |         return [seed]
40 | 
41 |     def _step(self, action):
42 |         action = np.clip(action, -0.025, 0.025)
43 |         self.state = np.clip(self.state + action, -1, 1)
44 |         return np.array(self.state), -np.linalg.norm(self.state), False, {}
45 | 
46 |     def _reset(self):
47 |         while True:
48 |             self.state = self.np_random.uniform(low=-1, high=1, size=(2,))
49 |             # Sample states that are far away
50 |             if np.linalg.norm(self.state) > 0.9:
51 |                 break
52 |         return np.array(self.state)
53 | 
54 |     # def _render(self, mode='human', close=False):
55 |     #     pass
56 | 
57 |     def _render(self, mode='human', close=False):
58 |         if close:
59 |             if self.viewer is not None:
60 |                 self.viewer.close()
61 |                 self.viewer = None
62 |             return
63 | 
64 |         screen_width = 800
65 |         screen_height = 800
66 | 
67 |         if self.viewer is None:
68 |             from gym.envs.classic_control import rendering
69 |             self.viewer = rendering.Viewer(screen_width, screen_height)
70 | 
71 |             agent = rendering.make_circle(
72 |                 min(screen_height, screen_width) * 0.03)
73 |             origin = rendering.make_circle(
74 |                 min(screen_height, screen_width) * 0.03)
75 |             trans = rendering.Transform(translation=(0, 0))
76 |             agent.add_attr(trans)
77 |             self.trans = trans
78 |             agent.set_color(1, 0, 0)
79 |             origin.set_color(0, 0, 0)
80 |             origin.add_attr(rendering.Transform(
81 |                 translation=(screen_width // 2, screen_height // 2)))
82 |             self.viewer.add_geom(agent)
83 |             self.viewer.add_geom(origin)
84 | 
85 |         # self.trans.set_translation(0, 0)
86 |         self.trans.set_translation(
87 |             (self.state[0] + 1) / 2 * screen_width,
88 |             (self.state[1] + 1) / 2 * screen_height,
89 |         )
90 | 
91 |         return self.viewer.render(return_rgb_array=mode == 'rgb_array')
92 | 
93 | 
94 | register(
95 |     'Point-v0',
96 |     entry_point='simplepg.point_env:PointEnv',
97 |     timestep_limit=40,
98 | )
99 | 


--------------------------------------------------------------------------------
/lab4/simplepg/rollout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | 
17 | import click
18 | import numpy as np
19 | import gym
20 | 
21 | from simplepg.simple_utils import include_bias, weighted_sample
22 | 
23 | 
24 | def point_get_action(theta, ob, rng=np.random):
25 |     ob_1 = include_bias(ob)
26 |     mean = theta.dot(ob_1)
27 |     return rng.normal(loc=mean, scale=1.)
28 | 
29 | 
30 | def cartpole_get_action(theta, ob, rng=np.random):
31 |     ob_1 = include_bias(ob)
32 |     logits = ob_1.dot(theta.T)
33 |     return weighted_sample(logits, rng=rng)
34 | 
35 | 
36 | @click.command()
37 | @click.argument("env_id", type=str, default="Point-v0")
38 | def main(env_id):
39 |     # Register the environment
40 |     rng = np.random.RandomState(42)
41 | 
42 |     if env_id == 'CartPole-v0':
43 |         env = gym.make('CartPole-v0')
44 |         get_action = cartpole_get_action
45 |         obs_dim = env.observation_space.shape[0]
46 |         action_dim = env.action_space.n
47 |     elif env_id == 'Point-v0':
48 |         from simplepg import point_env
49 |         env = gym.make('Point-v0')
50 |         get_action = point_get_action
51 |         obs_dim = env.observation_space.shape[0]
52 |         action_dim = env.action_space.shape[0]
53 |     else:
54 |         raise ValueError(
55 |             "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'")
56 | 
57 |     env.seed(42)
58 | 
59 |     # Initialize parameters
60 |     theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1))
61 | 
62 |     while True:
63 |         ob = env.reset()
64 |         done = False
65 |         # Only render the first trajectory
66 |         # Collect a new trajectory
67 |         rewards = []
68 |         while not done:
69 |             action = get_action(theta, ob, rng=rng)
70 |             next_ob, rew, done, _ = env.step(action)
71 |             ob = next_ob
72 |             env.render()
73 |             rewards.append(rew)
74 | 
75 |         print("Episode reward: %.2f" % np.sum(rewards))
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     main()
80 | 


--------------------------------------------------------------------------------
/lab4/simplepg/simple_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Copyright 2017 Deep RL Bootcamp Organizers.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | 
  8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9 | 
 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 11 | 
 12 | """
 13 | 
 14 | 
 15 | import numpy as np
 16 | import scipy.special
 17 | import chainer
 18 | 
 19 | 
 20 | # Compute gradient approximately using finite difference
 21 | def numerical_grad(f, x, eps=1e-8):
 22 |     grad = np.zeros_like(x)
 23 |     for i in range(len(x)):
 24 |         xplus = np.array(x)
 25 |         xplus[i] += eps
 26 |         fplus = f(xplus)
 27 |         xminus = np.array(x)
 28 |         xminus[i] -= eps
 29 |         fminus = f(xminus)
 30 |         grad[i] = (fplus - fminus) / (2 * eps)
 31 |     return grad
 32 | 
 33 | 
 34 | def gradient_check(f, g, x):
 35 |     # Test the implementation of g(x) = df/dx
 36 |     # Perform numerical differentiation and test it
 37 |     g_num = numerical_grad(f, x)
 38 |     g_test = g(x)
 39 |     try:
 40 |         np.testing.assert_allclose(g_num, g_test, rtol=1e-5)
 41 |         print("Gradient check passed!")
 42 |     except AssertionError as e:
 43 |         print(e)
 44 |         print("Error: Gradient check didn't pass!")
 45 |         exit()
 46 | 
 47 | 
 48 | def log_softmax(logits):
 49 |     return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True)
 50 | 
 51 | 
 52 | def softmax(logits):
 53 |     x = logits
 54 |     x = x - np.max(x, axis=-1, keepdims=True)
 55 |     x = np.exp(x)
 56 |     return x / np.sum(x, axis=-1, keepdims=True)
 57 | 
 58 | 
 59 | def weighted_sample(logits, rng=np.random):
 60 |     weights = softmax(logits)
 61 |     return min(
 62 |         int(np.sum(rng.uniform() > np.cumsum(weights))),
 63 |         len(weights) - 1
 64 |     )
 65 | 
 66 | 
 67 | def include_bias(x):
 68 |     # Add a constant term (1.0) to each entry in x
 69 |     return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1)
 70 | 
 71 | 
 72 | _tested = set()
 73 | _tests = dict()
 74 | 
 75 | nprs = np.random.RandomState
 76 | 
 77 | 
 78 | def register_test(fn_name, kwargs, desired_output=None):
 79 |     assert fn_name not in _tests
 80 |     _tests[fn_name] = (kwargs, desired_output)
 81 | 
 82 | 
 83 | def assert_allclose(a, b):
 84 |     if isinstance(a, (np.ndarray, float, int)):
 85 |         np.testing.assert_allclose(a, b, rtol=1e-5)
 86 |     elif isinstance(a, (tuple, list)):
 87 |         assert isinstance(b, (tuple, list))
 88 |         assert len(a) == len(b)
 89 |         for a_i, b_i in zip(a, b):
 90 |             assert_allclose(a_i, b_i)
 91 |     elif isinstance(a, chainer.Variable):
 92 |         assert isinstance(b, chainer.Variable)
 93 |         assert_allclose(a.data, b.data)
 94 |     else:
 95 |         raise NotImplementedError
 96 | 
 97 | 
 98 | def test_once(fn):
 99 |     module = fn.__module__
100 |     name = fn.__name__
101 |     key = module + "." + name
102 |     if key in _tested:
103 |         return
104 |     assert key in _tests, "Test for %s not found!" % key
105 |     kwargs, desired_output = _tests[key]
106 |     _tested.add(key)
107 | 
108 |     if callable(kwargs):
109 |         kwargs = kwargs()
110 | 
111 |     if callable(desired_output):
112 |         desired_output = desired_output()
113 | 
114 |     if desired_output is None:
115 |         print("Desired output for %s:" % key, repr(fn(**kwargs)))
116 |         exit()
117 |     else:
118 |         try:
119 |             output = fn(**kwargs)
120 |             assert_allclose(desired_output, output)
121 |             print("Test for %s passed!" % key)
122 |         except AssertionError as e:
123 |             print(e)
124 |             print("Error: test for %s didn't pass!" % key)
125 |             exit()
126 | 


--------------------------------------------------------------------------------
/lab4/tests/a2c_tests.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 3 | 
 4 | Copyright 2017 Deep RL Bootcamp Organizers.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 
12 | """
13 | 
14 | 
15 | from simplepg.simple_utils import register_test, nprs
16 | import numpy as np
17 | from chainer import Variable
18 | 
19 | register_test(
20 |     "a2c.compute_returns_advantages",
21 |     kwargs=lambda: dict(
22 |         rewards=nprs(0).uniform(size=(5, 2)),
23 |         dones=nprs(1).choice([True, False], size=(5, 2)),
24 |         values=nprs(2).uniform(size=(5, 2)),
25 |         next_values=nprs(3).uniform(size=(2,)),
26 |         discount=0.99,
27 |     ),
28 |     desired_output=lambda: (
29 |         np.array([[1.14554925, 1.25462372],
30 |                   [0.60276338, 0.54488318],
31 |                   [2.33579066, 1.90456042],
32 |                   [1.93145037, 1.2713801],
33 |                   [1.50895268, 0.38344152]]),
34 |         np.array([[0.70955434, 1.22869749],
35 |                   [0.0531009, 0.10956079],
36 |                   [1.91542286, 1.5742256],
37 |                   [1.72680173, 0.65210914],
38 |                   [1.20929801, 0.11661424]])
39 |     )
40 | )
41 | 
42 | register_test(
43 |     "a2c.compute_total_loss",
44 |     kwargs=lambda: dict(
45 |         logli=Variable(nprs(0).uniform(size=(10,)).astype(np.float32)),
46 |         all_advs=Variable(nprs(1).uniform(size=(10,)).astype(np.float32)),
47 |         ent_coeff=nprs(2).uniform(),
48 |         ent=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)),
49 |         vf_loss_coeff=nprs(4).uniform(),
50 |         all_returns=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)),
51 |         all_values=Variable(nprs(6).uniform(size=(10,)).astype(np.float32)),
52 |     ),
53 |     desired_output=lambda: (
54 |         Variable(np.array(-0.4047563076019287, dtype=np.float32)),
55 |         Variable(np.array(0.22883716225624084, dtype=np.float32)),
56 |         Variable(np.array(-0.1834639459848404, dtype=np.float32))
57 |     )
58 | )
59 | 


--------------------------------------------------------------------------------
/lab4/tests/pg_tests.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 3 | 
 4 | Copyright 2017 Deep RL Bootcamp Organizers.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 
12 | """
13 | 
14 | 
15 | from chainer import Variable
16 | 
17 | from simplepg.simple_utils import register_test, nprs
18 | from utils import Gaussian
19 | import numpy as np
20 | 
21 | register_test(
22 |     "pg.compute_surr_loss",
23 |     kwargs=lambda: dict(
24 |         dists=Gaussian(
25 |             means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)),
26 |             log_stds=Variable(nprs(1).uniform(
27 |                 size=(10, 3)).astype(np.float32)),
28 |         ),
29 |         all_acts=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)),
30 |         all_advs=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)),
31 |     ),
32 |     desired_output=lambda: Variable(
33 |         np.array(1.9201269149780273, dtype=np.float32))
34 | )
35 | 


--------------------------------------------------------------------------------
/lab4/tests/simplepg_tests.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 3 | 
 4 | Copyright 2017 Deep RL Bootcamp Organizers.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 
12 | """
13 | 
14 | 
15 | from simplepg.simple_utils import register_test, nprs
16 | import numpy as np
17 | 
18 | register_test(
19 |     "__main__.compute_update",
20 |     kwargs=lambda: dict(
21 |         discount=0.99,
22 |         R_tplus1=1.0,
23 |         theta=nprs(0).uniform(size=(2, 2)),
24 |         s_t=nprs(1).uniform(size=(1,)),
25 |         a_t=nprs(2).choice(2),
26 |         r_t=nprs(3).uniform(),
27 |         b_t=nprs(4).uniform(),
28 |         get_grad_logp_action=lambda theta, *_: theta * 2
29 |     ),
30 |     desired_output=lambda: (
31 |         1.5407979025745755,
32 |         np.array([[0.62978332, 0.82070564], [0.69169275, 0.62527314]])
33 |     )
34 | )
35 | 
36 | register_test(
37 |     "__main__.compute_baselines",
38 |     kwargs=lambda: dict(
39 |         all_returns=[
40 |             nprs(0).uniform(size=(10,)),
41 |             nprs(1).uniform(size=(20,)),
42 |             [],
43 |         ],
44 |     ),
45 |     desired_output=lambda: np.array([0.61576628, 0.36728075, 0.])
46 | )
47 | 
48 | register_test(
49 |     "__main__.compute_fisher_matrix",
50 |     kwargs=lambda: dict(
51 |         theta=nprs(1).uniform(size=(2, 2)),
52 |         get_grad_logp_action=lambda theta, ob, action: np.exp(
53 |             theta) * np.linalg.norm(action),
54 |         all_observations=list(nprs(2).uniform(size=(5, 1))),
55 |         all_actions=list(nprs(3).choice(2, size=(5,))),
56 |     ),
57 |     desired_output=lambda: np.array([[0.92104469, 1.24739299, 0.60704379, 0.82124306],
58 |                                      [1.24739299, 1.68937435,
59 |                                          0.82213401, 1.11222925],
60 |                                      [0.60704379, 0.82213401,
61 |                                          0.40009151, 0.54126635],
62 |                                      [0.82124306, 1.11222925, 0.54126635, 0.73225564]])
63 | )
64 | 
65 | register_test(
66 |     "__main__.compute_natural_gradient",
67 |     kwargs=lambda: dict(
68 |         F=nprs(0).uniform(size=(4, 4)),
69 |         grad=nprs(1).uniform(size=(2, 2)),
70 |         reg=1e-3,
71 |     ),
72 |     desired_output=lambda: np.array(
73 |         [[-0.44691565, 0.5477328], [-0.20366472, 0.72267091]])
74 | )
75 | 
76 | register_test(
77 |     "__main__.compute_step_size",
78 |     kwargs=lambda: dict(
79 |         F=nprs(0).uniform(size=(2, 2)),
80 |         natural_grad=nprs(1).uniform(size=(1, 2)),
81 |         natural_step_size=1e-2,
82 |     ),
83 |     desired_output=lambda: 0.1607407366467048,
84 | )
85 | 


--------------------------------------------------------------------------------
/lab4/tests/trpo_tests.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 3 | 
 4 | Copyright 2017 Deep RL Bootcamp Organizers.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 
12 | """
13 | 
14 | 
15 | from simplepg.simple_utils import register_test, nprs
16 | import numpy as np
17 | from chainer import Variable
18 | 
19 | from utils import Gaussian
20 | 
21 | register_test(
22 |     "trpo.compute_surr_loss",
23 |     kwargs=lambda: dict(
24 |         old_dists=Gaussian(
25 |             means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)),
26 |             log_stds=Variable(nprs(1).uniform(
27 |                 size=(10, 3)).astype(np.float32)),
28 |         ),
29 |         new_dists=Gaussian(
30 |             means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)),
31 |             log_stds=Variable(nprs(3).uniform(
32 |                 size=(10, 3)).astype(np.float32)),
33 |         ),
34 |         all_acts=Variable(nprs(4).uniform(size=(10, 3)).astype(np.float32)),
35 |         all_advs=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)),
36 |     ),
37 |     desired_output=lambda: Variable(
38 |         np.array(-0.5629823207855225, dtype=np.float32))
39 | )
40 | 
41 | register_test(
42 |     "trpo.compute_kl",
43 |     kwargs=lambda: dict(
44 |         old_dists=Gaussian(
45 |             means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)),
46 |             log_stds=Variable(nprs(1).uniform(
47 |                 size=(10, 3)).astype(np.float32)),
48 |         ),
49 |         new_dists=Gaussian(
50 |             means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)),
51 |             log_stds=Variable(nprs(3).uniform(
52 |                 size=(10, 3)).astype(np.float32)),
53 |         ),
54 |     ),
55 |     desired_output=lambda: Variable(
56 |         np.array(0.5306503176689148, dtype=np.float32))
57 | )
58 | 


--------------------------------------------------------------------------------
/lab4/viskit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab4/viskit/__init__.py


--------------------------------------------------------------------------------
/lab4/viskit/core.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Copyright 2017 Deep RL Bootcamp Organizers.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | 
  8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9 | 
 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 11 | """
 12 | 
 13 | 
 14 | import csv
 15 | import itertools
 16 | import json
 17 | import os
 18 | 
 19 | import numpy as np
 20 | 
 21 | 
 22 | # from sandbox.rocky.utils.py_utils import AttrDict
 23 | 
 24 | class AttrDict(dict):
 25 |     def __init__(self, *args, **kwargs):
 26 |         super(AttrDict, self).__init__(*args, **kwargs)
 27 |         self.__dict__ = self
 28 | 
 29 | 
 30 | def unique(l):
 31 |     return list(set(l))
 32 | 
 33 | 
 34 | def flatten(l):
 35 |     return [item for sublist in l for item in sublist]
 36 | 
 37 | 
 38 | def load_progress(progress_json_path, verbose=True):
 39 |     if verbose:
 40 |         print("Reading %s" % progress_json_path)
 41 |     entries = dict()
 42 |     rows = []
 43 |     with open(progress_json_path, 'r') as f:
 44 |         lines = f.read().split('\n')
 45 |         for line in lines:
 46 |             if len(line) > 0:
 47 |                 row = json.loads(line)
 48 |                 rows.append(row)
 49 |     all_keys = set(k for row in rows for k in row.keys())
 50 |     for k in all_keys:
 51 |         if k not in entries:
 52 |             entries[k] = []
 53 |         for row in rows:
 54 |             if k in row:
 55 |                 v = row[k]
 56 |                 try:
 57 |                     entries[k].append(float(v))
 58 |                 except:
 59 |                     entries[k].append(np.nan)
 60 |             else:
 61 |                 entries[k].append(np.nan)
 62 | 
 63 |         # entries[key] = [row.get(key, np.nan) for row in rows]
 64 |         #         added_keys = set()
 65 |         #         for k, v in row.items():
 66 |         #             if k not in entries:
 67 |         #                 entries[k] = []
 68 |         #             try:
 69 |         #                 entries[k].append(float(v))
 70 |         #             except:
 71 |         #                 entries[k].append(0.)
 72 |         #             added_keys.add(k)
 73 |         #         for k in entries.keys():
 74 |         #             if k not in added_keys:
 75 |         #                 entries[k].append(np.nan)
 76 |     entries = dict([(k, np.array(v)) for k, v in entries.items()])
 77 |     return entries
 78 | 
 79 | 
 80 | def flatten_dict(d):
 81 |     flat_params = dict()
 82 |     for k, v in d.items():
 83 |         if isinstance(v, dict):
 84 |             v = flatten_dict(v)
 85 |             for subk, subv in flatten_dict(v).items():
 86 |                 flat_params[k + "." + subk] = subv
 87 |         else:
 88 |             flat_params[k] = v
 89 |     return flat_params
 90 | 
 91 | 
 92 | def load_params(params_json_path):
 93 |     with open(params_json_path, 'r') as f:
 94 |         data = json.loads(f.read())
 95 |         if "args_data" in data:
 96 |             del data["args_data"]
 97 |         if "exp_name" not in data:
 98 |             data["exp_name"] = params_json_path.split("/")[-2]
 99 |     return data
100 | 
101 | 
102 | def lookup(d, keys):
103 |     if not isinstance(keys, list):
104 |         keys = keys.split(".")
105 |     for k in keys:
106 |         if hasattr(d, "__getitem__"):
107 |             if k in d:
108 |                 d = d[k]
109 |             else:
110 |                 return None
111 |         else:
112 |             return None
113 |     return d
114 | 
115 | 
116 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True):
117 |     if isinstance(exp_folder_paths, str):
118 |         exp_folder_paths = [exp_folder_paths]
119 |     exps = []
120 |     for exp_folder_path in exp_folder_paths:
121 |         exps += [x[0] for x in os.walk(exp_folder_path)]
122 |     if verbose:
123 |         print("finished walking exp folders")
124 |     exps_data = []
125 |     for exp in exps:
126 |         try:
127 |             exp_path = exp
128 |             variant_json_path = os.path.join(exp_path, "variant.json")
129 |             progress_json_path = os.path.join(exp_path, "progress.json")
130 |             progress = load_progress(progress_json_path, verbose=verbose)
131 |             try:
132 |                 params = load_params(variant_json_path)
133 |             except IOError:
134 |                 params = dict(exp_name="experiment")
135 |             exps_data.append(AttrDict(
136 |                 progress=progress, params=params, flat_params=flatten_dict(params)))
137 |         except IOError as e:
138 |             if verbose:
139 |                 print(e)
140 | 
141 |     # a dictionary of all keys and types of values
142 |     all_keys = dict()
143 |     for data in exps_data:
144 |         for key in data.flat_params.keys():
145 |             if key not in all_keys:
146 |                 all_keys[key] = type(data.flat_params[key])
147 | 
148 |     # if any data does not have some key, specify the value of it
149 |     if not ignore_missing_keys:
150 |         default_values = dict()
151 |         for data in exps_data:
152 |             for key in sorted(all_keys.keys()):
153 |                 if key not in data.flat_params:
154 |                     if key not in default_values:
155 |                         default = None
156 |                         default_values[key] = default
157 |                     data.flat_params[key] = default_values[key]
158 | 
159 |     return exps_data
160 | 
161 | 
162 | def smart_repr(x):
163 |     if isinstance(x, tuple):
164 |         if len(x) == 0:
165 |             return "tuple()"
166 |         elif len(x) == 1:
167 |             return "(%s,)" % smart_repr(x[0])
168 |         else:
169 |             return "(" + ",".join(map(smart_repr, x)) + ")"
170 |     else:
171 |         if hasattr(x, "__call__"):
172 |             return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__)
173 |         else:
174 |             return repr(x)
175 | 
176 | 
177 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1):
178 |     try:
179 |         stringified_pairs = sorted(
180 |             map(
181 |                 eval,
182 |                 unique(
183 |                     flatten(
184 |                         [
185 |                             list(
186 |                                 map(
187 |                                     smart_repr,
188 |                                     list(d.flat_params.items())
189 |                                 )
190 |                             )
191 |                             for d in exps_data
192 |                         ]
193 |                     )
194 |                 )
195 |             ),
196 |             key=lambda x: (
197 |                 tuple("" if it is None else str(it) for it in x),
198 |             )
199 |         )
200 |     except Exception as e:
201 |         print(e)
202 |         import ipdb
203 |         ipdb.set_trace()
204 |     proposals = [(k, [x[1] for x in v])
205 |                  for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])]
206 |     filtered = [(k, v) for (k, v) in proposals if len(v) > l and all(
207 |         [k.find(excluded_param) != 0 for excluded_param in excluded_params])]
208 |     return filtered
209 | 
210 | 
211 | class Selector(object):
212 |     def __init__(self, exps_data, filters=None, custom_filters=None):
213 |         self._exps_data = exps_data
214 |         if filters is None:
215 |             self._filters = tuple()
216 |         else:
217 |             self._filters = tuple(filters)
218 |         if custom_filters is None:
219 |             self._custom_filters = []
220 |         else:
221 |             self._custom_filters = custom_filters
222 | 
223 |     def where(self, k, v):
224 |         return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters)
225 | 
226 |     def custom_filter(self, filter):
227 |         return Selector(self._exps_data, self._filters, self._custom_filters + [filter])
228 | 
229 |     def _check_exp(self, exp):
230 |         # or exp.flat_params.get(k, None) is None
231 |         return all(
232 |             ((str(exp.flat_params.get(k, None)) == str(v) or (
233 |                 k not in exp.flat_params)) for k, v in self._filters)
234 |         ) and all(custom_filter(exp) for custom_filter in self._custom_filters)
235 | 
236 |     def extract(self):
237 |         return list(filter(self._check_exp, self._exps_data))
238 | 
239 |     def iextract(self):
240 |         return filter(self._check_exp, self._exps_data)
241 | 
242 | 
243 | # Taken from plot.ly
244 | color_defaults = [
245 |     '#1f77b4',  # muted blue
246 |     '#ff7f0e',  # safety orange
247 |     '#2ca02c',  # cooked asparagus green
248 |     '#d62728',  # brick red
249 |     '#9467bd',  # muted purple
250 |     '#8c564b',  # chestnut brown
251 |     '#e377c2',  # raspberry yogurt pink
252 |     '#7f7f7f',  # middle gray
253 |     '#bcbd22',  # curry yellow-green
254 |     '#17becf'  # blue-teal
255 | ]
256 | 
257 | 
258 | def hex_to_rgb(hex, opacity=1.0):
259 |     if hex[0] == '#':
260 |         hex = hex[1:]
261 |     assert (len(hex) == 6)
262 |     return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity)
263 | 


--------------------------------------------------------------------------------
/lab4/viskit/static/css/dropdowns-enhancement.css:
--------------------------------------------------------------------------------
  1 | .dropdown-menu > li > label {
  2 |   display: block;
  3 |   padding: 3px 20px;
  4 |   clear: both;
  5 |   font-weight: normal;
  6 |   line-height: 1.42857143;
  7 |   color: #333333;
  8 |   white-space: nowrap;
  9 | }
 10 | .dropdown-menu > li > label:hover,
 11 | .dropdown-menu > li > label:focus {
 12 |   text-decoration: none;
 13 |   color: #262626;
 14 |   background-color: #f5f5f5;
 15 | }
 16 | .dropdown-menu > li > input:checked ~ label,
 17 | .dropdown-menu > li > input:checked ~ label:hover,
 18 | .dropdown-menu > li > input:checked ~ label:focus,
 19 | .dropdown-menu > .active > label,
 20 | .dropdown-menu > .active > label:hover,
 21 | .dropdown-menu > .active > label:focus {
 22 |   color: #ffffff;
 23 |   text-decoration: none;
 24 |   outline: 0;
 25 |   background-color: #428bca;
 26 | }
 27 | .dropdown-menu > li > input[disabled] ~ label,
 28 | .dropdown-menu > li > input[disabled] ~ label:hover,
 29 | .dropdown-menu > li > input[disabled] ~ label:focus,
 30 | .dropdown-menu > .disabled > label,
 31 | .dropdown-menu > .disabled > label:hover,
 32 | .dropdown-menu > .disabled > label:focus {
 33 |   color: #999999;
 34 | }
 35 | .dropdown-menu > li > input[disabled] ~ label:hover,
 36 | .dropdown-menu > li > input[disabled] ~ label:focus,
 37 | .dropdown-menu > .disabled > label:hover,
 38 | .dropdown-menu > .disabled > label:focus {
 39 |   text-decoration: none;
 40 |   background-color: transparent;
 41 |   background-image: none;
 42 |   filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
 43 |   cursor: not-allowed;
 44 | }
 45 | .dropdown-menu > li > label {
 46 |   margin-bottom: 0;
 47 |   cursor: pointer;
 48 | }
 49 | .dropdown-menu > li > input[type="radio"],
 50 | .dropdown-menu > li > input[type="checkbox"] {
 51 |   display: none;
 52 |   position: absolute;
 53 |   top: -9999em;
 54 |   left: -9999em;
 55 | }
 56 | .dropdown-menu > li > label:focus,
 57 | .dropdown-menu > li > input:focus ~ label {
 58 |   outline: thin dotted;
 59 |   outline: 5px auto -webkit-focus-ring-color;
 60 |   outline-offset: -2px;
 61 | }
 62 | .dropdown-menu.pull-right {
 63 |   right: 0;
 64 |   left: auto;
 65 | }
 66 | .dropdown-menu.pull-top {
 67 |   bottom: 100%;
 68 |   top: auto;
 69 |   margin: 0 0 2px;
 70 |   -webkit-box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175);
 71 |   box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175);
 72 | }
 73 | .dropdown-menu.pull-center {
 74 |   right: 50%;
 75 |   left: auto;
 76 | }
 77 | .dropdown-menu.pull-middle {
 78 |   right: 100%;
 79 |   margin: 0 2px 0 0;
 80 |   box-shadow: -5px 0 10px rgba(0, 0, 0, 0.2);
 81 |   left: auto;
 82 | }
 83 | .dropdown-menu.pull-middle.pull-right {
 84 |   right: auto;
 85 |   left: 100%;
 86 |   margin: 0 0 0 2px;
 87 |   box-shadow: 5px 0 10px rgba(0, 0, 0, 0.2);
 88 | }
 89 | .dropdown-menu.pull-middle.pull-center {
 90 |   right: 50%;
 91 |   margin: 0;
 92 |   box-shadow: 0 0 10px rgba(0, 0, 0, 0.2);
 93 | }
 94 | .dropdown-menu.bullet {
 95 |   margin-top: 8px;
 96 | }
 97 | .dropdown-menu.bullet:before {
 98 |   width: 0;
 99 |   height: 0;
100 |   content: '';
101 |   display: inline-block;
102 |   position: absolute;
103 |   border-color: transparent;
104 |   border-style: solid;
105 |   -webkit-transform: rotate(360deg);
106 |   border-width: 0 7px 7px;
107 |   border-bottom-color: #cccccc;
108 |   border-bottom-color: rgba(0, 0, 0, 0.15);
109 |   top: -7px;
110 |   left: 9px;
111 | }
112 | .dropdown-menu.bullet:after {
113 |   width: 0;
114 |   height: 0;
115 |   content: '';
116 |   display: inline-block;
117 |   position: absolute;
118 |   border-color: transparent;
119 |   border-style: solid;
120 |   -webkit-transform: rotate(360deg);
121 |   border-width: 0 6px 6px;
122 |   border-bottom-color: #ffffff;
123 |   top: -6px;
124 |   left: 10px;
125 | }
126 | .dropdown-menu.bullet.pull-right:before {
127 |   left: auto;
128 |   right: 9px;
129 | }
130 | .dropdown-menu.bullet.pull-right:after {
131 |   left: auto;
132 |   right: 10px;
133 | }
134 | .dropdown-menu.bullet.pull-top {
135 |   margin-top: 0;
136 |   margin-bottom: 8px;
137 | }
138 | .dropdown-menu.bullet.pull-top:before {
139 |   top: auto;
140 |   bottom: -7px;
141 |   border-bottom-width: 0;
142 |   border-top-width: 7px;
143 |   border-top-color: #cccccc;
144 |   border-top-color: rgba(0, 0, 0, 0.15);
145 | }
146 | .dropdown-menu.bullet.pull-top:after {
147 |   top: auto;
148 |   bottom: -6px;
149 |   border-bottom: none;
150 |   border-top-width: 6px;
151 |   border-top-color: #ffffff;
152 | }
153 | .dropdown-menu.bullet.pull-center:before {
154 |   left: auto;
155 |   right: 50%;
156 |   margin-right: -7px;
157 | }
158 | .dropdown-menu.bullet.pull-center:after {
159 |   left: auto;
160 |   right: 50%;
161 |   margin-right: -6px;
162 | }
163 | .dropdown-menu.bullet.pull-middle {
164 |   margin-right: 8px;
165 | }
166 | .dropdown-menu.bullet.pull-middle:before {
167 |   top: 50%;
168 |   left: 100%;
169 |   right: auto;
170 |   margin-top: -7px;
171 |   border-right-width: 0;
172 |   border-bottom-color: transparent;
173 |   border-top-width: 7px;
174 |   border-left-color: #cccccc;
175 |   border-left-color: rgba(0, 0, 0, 0.15);
176 | }
177 | .dropdown-menu.bullet.pull-middle:after {
178 |   top: 50%;
179 |   left: 100%;
180 |   right: auto;
181 |   margin-top: -6px;
182 |   border-right-width: 0;
183 |   border-bottom-color: transparent;
184 |   border-top-width: 6px;
185 |   border-left-color: #ffffff;
186 | }
187 | .dropdown-menu.bullet.pull-middle.pull-right {
188 |   margin-right: 0;
189 |   margin-left: 8px;
190 | }
191 | .dropdown-menu.bullet.pull-middle.pull-right:before {
192 |   left: -7px;
193 |   border-left-width: 0;
194 |   border-right-width: 7px;
195 |   border-right-color: #cccccc;
196 |   border-right-color: rgba(0, 0, 0, 0.15);
197 | }
198 | .dropdown-menu.bullet.pull-middle.pull-right:after {
199 |   left: -6px;
200 |   border-left-width: 0;
201 |   border-right-width: 6px;
202 |   border-right-color: #ffffff;
203 | }
204 | .dropdown-menu.bullet.pull-middle.pull-center {
205 |   margin-left: 0;
206 |   margin-right: 0;
207 | }
208 | .dropdown-menu.bullet.pull-middle.pull-center:before {
209 |   border: none;
210 |   display: none;
211 | }
212 | .dropdown-menu.bullet.pull-middle.pull-center:after {
213 |   border: none;
214 |   display: none;
215 | }
216 | .dropdown-submenu {
217 |   position: relative;
218 | }
219 | .dropdown-submenu > .dropdown-menu {
220 |   top: 0;
221 |   left: 100%;
222 |   margin-top: -6px;
223 |   margin-left: -1px;
224 |   border-top-left-radius: 0;
225 | }
226 | .dropdown-submenu > a:before {
227 |   display: block;
228 |   float: right;
229 |   width: 0;
230 |   height: 0;
231 |   content: "";
232 |   margin-top: 6px;
233 |   margin-right: -8px;
234 |   border-width: 4px 0 4px 4px;
235 |   border-style: solid;
236 |   border-left-style: dashed;
237 |   border-top-color: transparent;
238 |   border-bottom-color: transparent;
239 | }
240 | @media (max-width: 767px) {
241 |   .navbar-nav .dropdown-submenu > a:before {
242 |     margin-top: 8px;
243 |     border-color: inherit;
244 |     border-style: solid;
245 |     border-width: 4px 4px 0;
246 |     border-left-color: transparent;
247 |     border-right-color: transparent;
248 |   }
249 |   .navbar-nav .dropdown-submenu > a {
250 |     padding-left: 40px;
251 |   }
252 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > a,
253 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > label {
254 |     padding-left: 35px;
255 |   }
256 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > a,
257 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > label {
258 |     padding-left: 45px;
259 |   }
260 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a,
261 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label {
262 |     padding-left: 55px;
263 |   }
264 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a,
265 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label {
266 |     padding-left: 65px;
267 |   }
268 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a,
269 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label {
270 |     padding-left: 75px;
271 |   }
272 | }
273 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a,
274 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover,
275 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus {
276 |   background-color: #e7e7e7;
277 |   color: #555555;
278 | }
279 | @media (max-width: 767px) {
280 |   .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before {
281 |     border-top-color: #555555;
282 |   }
283 | }
284 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a,
285 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover,
286 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus {
287 |   background-color: #080808;
288 |   color: #ffffff;
289 | }
290 | @media (max-width: 767px) {
291 |   .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before {
292 |     border-top-color: #ffffff;
293 |   }
294 | }
295 | 


--------------------------------------------------------------------------------
/lab4/viskit/static/js/dropdowns-enhancement.js:
--------------------------------------------------------------------------------
  1 | /* ========================================================================
  2 |  * Bootstrap Dropdowns Enhancement: dropdowns-enhancement.js v3.1.1 (Beta 1)
  3 |  * http://behigh.github.io/bootstrap_dropdowns_enhancement/
  4 |  * ========================================================================
  5 |  * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
  6 |  * ======================================================================== */
  7 | 
  8 | (function($) {
  9 |     "use strict";
 10 | 
 11 |     var toggle   = '[data-toggle="dropdown"]',
 12 |         disabled = '.disabled, :disabled',
 13 |         backdrop = '.dropdown-backdrop',
 14 |         menuClass = 'dropdown-menu',
 15 |         subMenuClass = 'dropdown-submenu',
 16 |         namespace = '.bs.dropdown.data-api',
 17 |         eventNamespace = '.bs.dropdown',
 18 |         openClass = 'open',
 19 |         touchSupport = 'ontouchstart' in document.documentElement,
 20 |         opened;
 21 | 
 22 | 
 23 |     function Dropdown(element) {
 24 |         $(element).on('click' + eventNamespace, this.toggle)
 25 |     }
 26 | 
 27 |     var proto = Dropdown.prototype;
 28 | 
 29 |     proto.toggle = function(event) {
 30 |         var $element = $(this);
 31 | 
 32 |         if ($element.is(disabled)) return;
 33 | 
 34 |         var $parent = getParent($element);
 35 |         var isActive = $parent.hasClass(openClass);
 36 |         var isSubMenu = $parent.hasClass(subMenuClass);
 37 |         var menuTree = isSubMenu ? getSubMenuParents($parent) : null;
 38 | 
 39 |         closeOpened(event, menuTree);
 40 | 
 41 |         if (!isActive) {
 42 |             if (!menuTree)
 43 |                 menuTree = [$parent];
 44 | 
 45 |             if (touchSupport && !$parent.closest('.navbar-nav').length && !menuTree[0].find(backdrop).length) {
 46 |                 // if mobile we use a backdrop because click events don't delegate
 47 |                 $('<div class="' + backdrop.substr(1) + '"/>').appendTo(menuTree[0]).on('click', closeOpened)
 48 |             }
 49 | 
 50 |             for (var i = 0, s = menuTree.length; i < s; i++) {
 51 |                 if (!menuTree[i].hasClass(openClass)) {
 52 |                     menuTree[i].addClass(openClass);
 53 |                     positioning(menuTree[i].children('.' + menuClass), menuTree[i]);
 54 |                 }
 55 |             }
 56 |             opened = menuTree[0];
 57 |         }
 58 | 
 59 |         return false;
 60 |     };
 61 | 
 62 |     proto.keydown = function (e) {
 63 |         if (!/(38|40|27)/.test(e.keyCode)) return;
 64 | 
 65 |         var $this = $(this);
 66 | 
 67 |         e.preventDefault();
 68 |         e.stopPropagation();
 69 | 
 70 |         if ($this.is('.disabled, :disabled')) return;
 71 | 
 72 |         var $parent = getParent($this);
 73 |         var isActive = $parent.hasClass('open');
 74 | 
 75 |         if (!isActive || (isActive && e.keyCode == 27)) {
 76 |             if (e.which == 27) $parent.find(toggle).trigger('focus');
 77 |             return $this.trigger('click')
 78 |         }
 79 | 
 80 |         var desc = ' li:not(.divider):visible a';
 81 |         var desc1 = 'li:not(.divider):visible > input:not(disabled) ~ label';
 82 |         var $items = $parent.find(desc1 + ', ' + '[role="menu"]' + desc + ', [role="listbox"]' + desc);
 83 | 
 84 |         if (!$items.length) return;
 85 | 
 86 |         var index = $items.index($items.filter(':focus'));
 87 | 
 88 |         if (e.keyCode == 38 && index > 0)                 index--;                        // up
 89 |         if (e.keyCode == 40 && index < $items.length - 1) index++;                        // down
 90 |         if (!~index)                                      index = 0;
 91 | 
 92 |         $items.eq(index).trigger('focus')
 93 |     };
 94 | 
 95 |     proto.change = function (e) {
 96 | 
 97 |         var
 98 |             $parent,
 99 |             $menu,
100 |             $toggle,
101 |             selector,
102 |             text = '',
103 |             $items;
104 | 
105 |         $menu = $(this).closest('.' + menuClass);
106 | 
107 |         $toggle = $menu.parent().find('[data-label-placement]');
108 | 
109 |         if (!$toggle || !$toggle.length) {
110 |             $toggle = $menu.parent().find(toggle);
111 |         }
112 | 
113 |         if (!$toggle || !$toggle.length || $toggle.data('placeholder') === false)
114 |             return; // do nothing, no control
115 | 
116 |         ($toggle.data('placeholder') == undefined && $toggle.data('placeholder', $.trim($toggle.text())));
117 |         text = $.data($toggle[0], 'placeholder');
118 | 
119 |         $items = $menu.find('li > input:checked');
120 | 
121 |         if ($items.length) {
122 |             text = [];
123 |             $items.each(function () {
124 |                 var str = $(this).parent().find('label').eq(0),
125 |                     label = str.find('.data-label');
126 | 
127 |                 if (label.length) {
128 |                     var p = $('<p></p>');
129 |                     p.append(label.clone());
130 |                     str = p.html();
131 |                 }
132 |                 else {
133 |                     str = str.html();
134 |                 }
135 | 
136 | 
137 |                 str && text.push($.trim(str));
138 |             });
139 | 
140 |             text = text.length < 4 ? text.join(', ') : text.length + ' selected';
141 |         }
142 | 
143 |         var caret = $toggle.find('.caret');
144 | 
145 |         $toggle.html(text || '&nbsp;');
146 |         if (caret.length)
147 |             $toggle.append(' ') && caret.appendTo($toggle);
148 | 
149 |     };
150 | 
151 |     function positioning($menu, $control) {
152 |         if ($menu.hasClass('pull-center')) {
153 |             $menu.css('margin-right', $menu.outerWidth() / -2);
154 |         }
155 | 
156 |         if ($menu.hasClass('pull-middle')) {
157 |             $menu.css('margin-top', ($menu.outerHeight() / -2) - ($control.outerHeight() / 2));
158 |         }
159 |     }
160 | 
161 |     function closeOpened(event, menuTree) {
162 |         if (opened) {
163 | 
164 |             if (!menuTree) {
165 |                 menuTree = [opened];
166 |             }
167 | 
168 |             var parent;
169 | 
170 |             if (opened[0] !== menuTree[0][0]) {
171 |                 parent = opened;
172 |             } else {
173 |                 parent = menuTree[menuTree.length - 1];
174 |                 if (parent.parent().hasClass(menuClass)) {
175 |                     parent = parent.parent();
176 |                 }
177 |             }
178 | 
179 |             parent.find('.' + openClass).removeClass(openClass);
180 | 
181 |             if (parent.hasClass(openClass))
182 |                 parent.removeClass(openClass);
183 | 
184 |             if (parent === opened) {
185 |                 opened = null;
186 |                 $(backdrop).remove();
187 |             }
188 |         }
189 |     }
190 | 
191 |     function getSubMenuParents($submenu) {
192 |         var result = [$submenu];
193 |         var $parent;
194 |         while (!$parent || $parent.hasClass(subMenuClass)) {
195 |             $parent = ($parent || $submenu).parent();
196 |             if ($parent.hasClass(menuClass)) {
197 |                 $parent = $parent.parent();
198 |             }
199 |             if ($parent.children(toggle)) {
200 |                 result.unshift($parent);
201 |             }
202 |         }
203 |         return result;
204 |     }
205 | 
206 |     function getParent($this) {
207 |         var selector = $this.attr('data-target');
208 | 
209 |         if (!selector) {
210 |             selector = $this.attr('href');
211 |             selector = selector && /#[A-Za-z]/.test(selector) && selector.replace(/.*(?=#[^\s]*$)/, ''); //strip for ie7
212 |         }
213 | 
214 |         var $parent = selector && $(selector);
215 | 
216 |         return $parent && $parent.length ? $parent : $this.parent()
217 |     }
218 | 
219 |     // DROPDOWN PLUGIN DEFINITION
220 |     // ==========================
221 | 
222 |     var old = $.fn.dropdown;
223 | 
224 |     $.fn.dropdown = function (option) {
225 |         return this.each(function () {
226 |             var $this = $(this);
227 |             var data = $this.data('bs.dropdown');
228 | 
229 |             if (!data) $this.data('bs.dropdown', (data = new Dropdown(this)));
230 |             if (typeof option == 'string') data[option].call($this);
231 |         })
232 |     };
233 | 
234 |     $.fn.dropdown.Constructor = Dropdown;
235 | 
236 |     $.fn.dropdown.clearMenus = function(e) {
237 |         $(backdrop).remove();
238 |         $('.' + openClass + ' ' + toggle).each(function () {
239 |             var $parent = getParent($(this));
240 |             var relatedTarget = { relatedTarget: this };
241 |             if (!$parent.hasClass('open')) return;
242 |             $parent.trigger(e = $.Event('hide' + eventNamespace, relatedTarget));
243 |             if (e.isDefaultPrevented()) return;
244 |             $parent.removeClass('open').trigger('hidden' + eventNamespace, relatedTarget);
245 |         });
246 |         return this;
247 |     };
248 | 
249 | 
250 |     // DROPDOWN NO CONFLICT
251 |     // ====================
252 | 
253 |     $.fn.dropdown.noConflict = function () {
254 |         $.fn.dropdown = old;
255 |         return this
256 |     };
257 | 
258 | 
259 |     $(document).off(namespace)
260 |         .on('click' + namespace, closeOpened)
261 |         .on('click' + namespace, toggle, proto.toggle)
262 |         .on('click' + namespace, '.dropdown-menu > li > input[type="checkbox"] ~ label, .dropdown-menu > li > input[type="checkbox"], .dropdown-menu.noclose > li', function (e) {
263 |             e.stopPropagation()
264 |         })
265 |         .on('change' + namespace, '.dropdown-menu > li > input[type="checkbox"], .dropdown-menu > li > input[type="radio"]', proto.change)
266 |         .on('keydown' + namespace, toggle + ', [role="menu"], [role="listbox"]', proto.keydown)
267 | }(jQuery));


--------------------------------------------------------------------------------
/prelab/docker_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | viskit_port=$("$DIR/findport.py" 5000 1)
 4 | xhost=xhost
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | if [[ $(uname) == 'Darwin' ]]; then
12 |     # if xhost not defined, check 
13 |     if ! hash $xhost 2>/dev/null; then
14 |         xhost=/opt/X11/bin/xhost
15 |         if [ ! -f $xhost ]; then
16 |             echo "xhost not found!"
17 |             exit
18 |         fi
19 |     fi
20 |     ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}')
21 |     $xhost + $ip >/dev/null
22 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
23 |         -e DISPLAY=$ip:0 \
24 |         -v "$DIR":/root/code/bootcamp_pg \
25 |         -ti dementrock/deeprlbootcamp \
26 |           ${1-/bin/bash} "${@:2}"
27 |     $xhost - $ip >/dev/null
28 | elif [[ $(uname) == 'Linux' ]]; then
29 |     $xhost +local:root >/dev/null
30 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
31 |         -e DISPLAY=$DISPLAY \
32 |         -v /tmp/.X11-unix:/tmp/.X11-unix \
33 |         -v "$DIR":/root/code/bootcamp_pg \
34 |         -ti dementrock/deeprlbootcamp \
35 |           ${1-/bin/bash} "${@:2}"
36 |     $xhost -local:root >/dev/null
37 | else
38 |     echo "This script only supports macOS or Linux"
39 | fi
40 | 


--------------------------------------------------------------------------------
/prelab/docker_run_vnc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | vnc_port=$("$DIR/findport.py" 3000 1)
 4 | viskit_port=$("$DIR/findport.py" 5000 1)
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284"
12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
13 |     -v "$DIR":/root/code/bootcamp_pg \
14 |     -ti dementrock/deeprlbootcamp \
15 |       ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}"
16 | 


--------------------------------------------------------------------------------
/prelab/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | dependencies:
 3 |     - python==3.5.3
 4 |     - numpy==1.13.1
 5 |     - notebook==5.0.0
 6 |     - pip:
 7 |         - gym==0.9.2
 8 |         - chainer==2.0.1
 9 |         - matplotlib==2.0.2
10 | 


--------------------------------------------------------------------------------
/prelab/findport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Usage: findport.py 3000 100
 4 | #
 5 | 
 6 | """
 7 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 8 | 
 9 | Copyright 2017 Deep RL Bootcamp Organizers.
10 | 
11 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16 | 
17 | """
18 | 
19 | 
20 | from __future__ import print_function
21 | import socket
22 | from contextlib import closing
23 | import sys
24 | 
25 | if len(sys.argv) != 3:
26 |     print("Usage: {} <base_port> <increment>".format(sys.argv[0]))
27 |     sys.exit(1)
28 | 
29 | base = int(sys.argv[1])
30 | increment = int(sys.argv[2])
31 | 
32 | 
33 | def find_free_port():
34 |     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
35 |         for port in range(base, 65536, increment):
36 |             try:
37 |                 s.bind(('', port))
38 |                 return s.getsockname()[1]
39 |             except socket.error:
40 |                 continue
41 | 
42 | 
43 | print(find_free_port())
44 | 


--------------------------------------------------------------------------------
/prelab/launch_bg_screen_buffer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | killall() {
 4 | kill -INT "$xvfb_pid" 
 5 | kill -INT "$x11vnc_pid" 
 6 | exit
 7 | }
 8 | 
 9 | trap killall SIGINT
10 | trap killall SIGTERM
11 | trap killall SIGKILL
12 | 
13 | Xvfb :99 -screen 0 1024x768x24 -ac  +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$!
14 | 
15 | mkdir ~/.x11vnc
16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd
17 | 
18 | command="${1-/bin/bash} ${@:2}"
19 | 
20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!"
21 | 
22 | DISPLAY=:99 $command
23 | 
24 | killall
25 | 


--------------------------------------------------------------------------------
/prelab/prelab.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/prelab/prelab.pdf


--------------------------------------------------------------------------------
/prelab/scripts/setup_xquartz.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Check if XQuartz is installed
 3 | 
 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@"
 5 | 
 6 | app_dir=/Applications/Utilities/XQuartz.app
 7 | 
 8 | if [ -d $app_dir ]; then
 9 |     # Check installed version
10 |     app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString)
11 |     if [ $app_version == "2.7.11" ]; then
12 |         defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
13 |         defaults write org.macosforge.xquartz.X11 no_auth -bool false
14 |         defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
15 |         echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!"
16 |         exit
17 |     else
18 |         read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response
19 |         case "$response" in
20 |             [yY][eE][sS]|[yY]) 
21 |                 ;;
22 |             *)
23 |                 exit
24 |                 ;;
25 |         esac
26 |     fi
27 | fi
28 | 
29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg
30 | dmg_path=/tmp/xquartz.dmg
31 | echo "Downloading dmg from $url..."
32 | /usr/bin/curl -L -o $dmg_path $url
33 | echo "Mounting dmg file..."
34 | hdiutil mount $dmg_path
35 | echo "Installing..."
36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg  -target /
37 | 
38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false
40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
41 | 
42 | echo "Done! Make sure to log out and then log back in for the changes to take effect."
43 | 


--------------------------------------------------------------------------------
/prelab/scripts/test_environment_setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | def main():
17 |     import roboschool
18 |     import gym
19 |     import chainer
20 |     env = gym.make('CartPole-v0')
21 |     env.reset()
22 |     env.step(env.action_space.sample())
23 |     env = gym.make('RoboschoolHalfCheetah-v1')
24 |     env.reset()
25 |     env.step(env.action_space.sample())
26 |     print("Your environment has been successfully set up!")
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     main()
31 | 


--------------------------------------------------------------------------------
/prelab/simplepg/point_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Copyright 2017 Deep RL Bootcamp Organizers.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | 
  8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9 | 
 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 11 | 
 12 | """
 13 | 
 14 | 
 15 | 
 16 | from gym import Env
 17 | from gym.envs.registration import register
 18 | from gym.utils import seeding
 19 | from gym import spaces
 20 | from gym.envs.classic_control.cartpole import CartPoleEnv
 21 | import numpy as np
 22 | 
 23 | 
 24 | class PointEnv(Env):
 25 |     metadata = {
 26 |         'render.modes': ['human', 'rgb_array'],
 27 |         'video.frames_per_second': 50
 28 |     }
 29 | 
 30 |     def __init__(self):
 31 |         self.action_space = spaces.Box(low=-1, high=1, shape=(2,))
 32 |         self.observation_space = spaces.Box(low=-1, high=1, shape=(2,))
 33 | 
 34 |         self._seed()
 35 |         self.viewer = None
 36 |         self.state = None
 37 | 
 38 |     def _seed(self, seed=None):
 39 |         self.np_random, seed = seeding.np_random(seed)
 40 |         return [seed]
 41 | 
 42 |     def _step(self, action):
 43 |         action = np.clip(action, -0.025, 0.025)
 44 |         self.state = np.clip(self.state + action, -1, 1)
 45 |         return np.array(self.state), -np.linalg.norm(self.state), False, {}
 46 | 
 47 |     def _reset(self):
 48 |         while True:
 49 |             self.state = self.np_random.uniform(low=-1, high=1, size=(2,))
 50 |             # Sample states that are far away
 51 |             if np.linalg.norm(self.state) > 0.9:
 52 |                 break
 53 |         return np.array(self.state)
 54 | 
 55 |     # def _render(self, mode='human', close=False):
 56 |     #     pass
 57 | 
 58 |     def _render(self, mode='human', close=False):
 59 |         if close:
 60 |             if self.viewer is not None:
 61 |                 self.viewer.close()
 62 |                 self.viewer = None
 63 |             return
 64 | 
 65 |         screen_width = 800
 66 |         screen_height = 800
 67 | 
 68 |         if self.viewer is None:
 69 |             from gym.envs.classic_control import rendering
 70 |             self.viewer = rendering.Viewer(screen_width, screen_height)
 71 | 
 72 |             agent = rendering.make_circle(
 73 |                 min(screen_height, screen_width) * 0.03)
 74 |             origin = rendering.make_circle(
 75 |                 min(screen_height, screen_width) * 0.03)
 76 |             trans = rendering.Transform(translation=(0, 0))
 77 |             agent.add_attr(trans)
 78 |             self.trans = trans
 79 |             agent.set_color(1, 0, 0)
 80 |             origin.set_color(0, 0, 0)
 81 |             origin.add_attr(rendering.Transform(
 82 |                 translation=(screen_width // 2, screen_height // 2)))
 83 |             self.viewer.add_geom(agent)
 84 |             self.viewer.add_geom(origin)
 85 | 
 86 |         # self.trans.set_translation(0, 0)
 87 |         self.trans.set_translation(
 88 |             (self.state[0] + 1) / 2 * screen_width,
 89 |             (self.state[1] + 1) / 2 * screen_height,
 90 |         )
 91 | 
 92 |         return self.viewer.render(return_rgb_array=mode == 'rgb_array')
 93 | 
 94 | 
 95 | register(
 96 |     'Point-v0',
 97 |     entry_point='simplepg.point_env:PointEnv',
 98 |     timestep_limit=40,
 99 | )
100 | 


--------------------------------------------------------------------------------
/prelab/simplepg/rollout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
 5 | 
 6 | Copyright 2017 Deep RL Bootcamp Organizers.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 
14 | """
15 | 
16 | import click
17 | import numpy as np
18 | import gym
19 | 
20 | from simplepg.simple_utils import include_bias, weighted_sample
21 | 
22 | 
23 | def point_get_action(theta, ob, rng=np.random):
24 |     ob_1 = include_bias(ob)
25 |     mean = theta.dot(ob_1)
26 |     return rng.normal(loc=mean, scale=1.)
27 | 
28 | 
29 | def cartpole_get_action(theta, ob, rng=np.random):
30 |     ob_1 = include_bias(ob)
31 |     logits = ob_1.dot(theta.T)
32 |     return weighted_sample(logits, rng=rng)
33 | 
34 | 
35 | @click.command()
36 | @click.argument("env_id", type=str, default="Point-v0")
37 | def main(env_id):
38 |     # Register the environment
39 |     rng = np.random.RandomState(42)
40 | 
41 |     if env_id == 'CartPole-v0':
42 |         env = gym.make('CartPole-v0')
43 |         get_action = cartpole_get_action
44 |         obs_dim = env.observation_space.shape[0]
45 |         action_dim = env.action_space.n
46 |     elif env_id == 'Point-v0':
47 |         from simplepg import point_env
48 |         env = gym.make('Point-v0')
49 |         get_action = point_get_action
50 |         obs_dim = env.observation_space.shape[0]
51 |         action_dim = env.action_space.shape[0]
52 |     else:
53 |         raise ValueError(
54 |             "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'")
55 | 
56 |     env.seed(42)
57 | 
58 |     # Initialize parameters
59 |     theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1))
60 | 
61 |     while True:
62 |         ob = env.reset()
63 |         done = False
64 |         # Only render the first trajectory
65 |         # Collect a new trajectory
66 |         rewards = []
67 |         while not done:
68 |             action = get_action(theta, ob, rng=rng)
69 |             next_ob, rew, done, _ = env.step(action)
70 |             ob = next_ob
71 |             env.render()
72 |             rewards.append(rew)
73 | 
74 |         print("Episode reward: %.2f" % np.sum(rewards))
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/prelab/simplepg/simple_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/.
  3 | 
  4 | Copyright 2017 Deep RL Bootcamp Organizers.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | 
  8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9 | 
 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 11 | 
 12 | """
 13 | 
 14 | 
 15 | 
 16 | import numpy as np
 17 | import scipy.special
 18 | import chainer
 19 | 
 20 | 
 21 | # Compute gradient approximately using finite difference
 22 | def numerical_grad(f, x, eps=1e-8):
 23 |     grad = np.zeros_like(x)
 24 |     for i in range(len(x)):
 25 |         xplus = np.array(x)
 26 |         xplus[i] += eps
 27 |         fplus = f(xplus)
 28 |         xminus = np.array(x)
 29 |         xminus[i] -= eps
 30 |         fminus = f(xminus)
 31 |         grad[i] = (fplus - fminus) / (2 * eps)
 32 |     return grad
 33 | 
 34 | 
 35 | def gradient_check(f, g, x):
 36 |     # Test the implementation of g(x) = df/dx
 37 |     # Perform numerical differentiation and test it
 38 |     g_num = numerical_grad(f, x)
 39 |     g_test = g(x)
 40 |     try:
 41 |         np.testing.assert_allclose(g_num, g_test, rtol=1e-5)
 42 |         print("Gradient check passed!")
 43 |     except AssertionError as e:
 44 |         print(e)
 45 |         print("Error: Gradient check didn't pass!")
 46 |         exit()
 47 | 
 48 | 
 49 | def log_softmax(logits):
 50 |     return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True)
 51 | 
 52 | 
 53 | def softmax(logits):
 54 |     x = logits
 55 |     x = x - np.max(x, axis=-1, keepdims=True)
 56 |     x = np.exp(x)
 57 |     return x / np.sum(x, axis=-1, keepdims=True)
 58 | 
 59 | 
 60 | def weighted_sample(logits, rng=np.random):
 61 |     weights = softmax(logits)
 62 |     return min(
 63 |         int(np.sum(rng.uniform() > np.cumsum(weights))),
 64 |         len(weights) - 1
 65 |     )
 66 | 
 67 | 
 68 | def include_bias(x):
 69 |     # Add a constant term (1.0) to each entry in x
 70 |     return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1)
 71 | 
 72 | 
 73 | _tested = set()
 74 | _tests = dict()
 75 | 
 76 | nprs = np.random.RandomState
 77 | 
 78 | 
 79 | def register_test(fn_name, kwargs, desired_output=None):
 80 |     assert fn_name not in _tests
 81 |     _tests[fn_name] = (kwargs, desired_output)
 82 | 
 83 | 
 84 | def assert_allclose(a, b):
 85 |     if isinstance(a, (np.ndarray, float, int)):
 86 |         np.testing.assert_allclose(a, b)
 87 |     elif isinstance(a, (tuple, list)):
 88 |         assert isinstance(b, (tuple, list))
 89 |         assert len(a) == len(b)
 90 |         for a_i, b_i in zip(a, b):
 91 |             assert_allclose(a_i, b_i)
 92 |     elif isinstance(a, chainer.Variable):
 93 |         assert isinstance(b, chainer.Variable)
 94 |         assert_allclose(a.data, b.data)
 95 |     else:
 96 |         raise NotImplementedError
 97 | 
 98 | 
 99 | def test_once(fn):
100 |     module = fn.__module__
101 |     name = fn.__name__
102 |     key = module + "." + name
103 |     if key in _tested:
104 |         return
105 |     assert key in _tests, "Test for %s not found!" % key
106 |     kwargs, desired_output = _tests[key]
107 |     _tested.add(key)
108 | 
109 |     if callable(kwargs):
110 |         kwargs = kwargs()
111 | 
112 |     if callable(desired_output):
113 |         desired_output = desired_output()
114 | 
115 |     if desired_output is None:
116 |         print("Desired output for %s:" % key, repr(fn(**kwargs)))
117 |         exit()
118 |     else:
119 |         try:
120 |             output = fn(**kwargs)
121 |             assert_allclose(desired_output, output)
122 |             print("Test for %s passed!" % key)
123 |         except AssertionError as e:
124 |             print(e)
125 |             print("Error: test for %s didn't pass!" % key)
126 |             exit()
127 | 


--------------------------------------------------------------------------------