├── .gitignore ├── README.md ├── lab1 ├── LICENSE ├── Lab 1 - Problem 1.ipynb ├── Lab 1 - Problem 2.ipynb ├── Lab 1 - Problem 3.ipynb ├── crawler_env.py ├── discrete_env.py ├── environment.yml ├── frozen_lake.py ├── lab1.md └── misc.py ├── lab2 ├── LICENSE ├── Lab 2.ipynb ├── environment.yml └── lab2.md ├── lab3 ├── LICENSE ├── docker_run.sh ├── docker_run_vnc.sh ├── environment.yml ├── findport.py ├── findport.sh ├── lab3.pdf ├── launch_bg_screen_buffer.sh ├── logger.py ├── scripts │ ├── setup_xquartz.sh │ └── test_environment_setup.py ├── simpledqn │ ├── __init__.py │ ├── gridworld_env.py │ ├── main.py │ ├── replay_buffer.py │ ├── replay_buffer_warm_start.pkl │ ├── simple_utils.py │ ├── weights_warm_start.pkl │ └── wrappers.py └── viskit │ ├── __init__.py │ ├── core.py │ ├── frontend.py │ ├── static │ ├── css │ │ ├── bootstrap.min.css │ │ └── dropdowns-enhancement.css │ └── js │ │ ├── bootstrap.min.js │ │ ├── dropdowns-enhancement.js │ │ ├── jquery-1.10.2.min.js │ │ ├── jquery.loadTemplate-1.5.6.js │ │ └── plotly-latest.min.js │ └── templates │ └── main.html ├── lab4 ├── LICENSE ├── a2c.py ├── alg_utils.py ├── algs.py ├── docker_run.sh ├── docker_run_vnc.sh ├── env_makers.py ├── environment.yml ├── experiments │ ├── run_a2c_breakout.py │ ├── run_a2c_pong.py │ ├── run_a2c_pong_warm_start.py │ ├── run_pg_cartpole.py │ ├── run_trpo_cartpole.py │ ├── run_trpo_half_cheetah.py │ └── run_trpo_pendulum.py ├── findport.py ├── lab4.pdf ├── launch_bg_screen_buffer.sh ├── logger.py ├── models.py ├── pg.py ├── pong_warm_start.pkl ├── scripts │ ├── resume_training.py │ ├── setup_xquartz.sh │ ├── sim_policy.py │ └── test_environment_setup.py ├── simplepg │ ├── main.py │ ├── point_env.py │ ├── rollout.py │ └── simple_utils.py ├── tests │ ├── a2c_tests.py │ ├── pg_tests.py │ ├── simplepg_tests.py │ └── trpo_tests.py ├── trpo.py ├── utils.py └── viskit │ ├── __init__.py │ ├── core.py │ ├── frontend.py │ ├── static │ ├── css │ │ ├── bootstrap.min.css │ │ └── dropdowns-enhancement.css │ └── js │ │ ├── bootstrap.min.js │ │ ├── dropdowns-enhancement.js │ │ ├── jquery-1.10.2.min.js │ │ ├── jquery.loadTemplate-1.5.6.js │ │ └── plotly-latest.min.js │ └── templates │ └── main.html └── prelab ├── docker_run.sh ├── docker_run_vnc.sh ├── environment.yml ├── findport.py ├── launch_bg_screen_buffer.sh ├── prelab.pdf ├── scripts ├── setup_xquartz.sh └── test_environment_setup.py └── simplepg ├── point_env.py ├── rollout.py └── simple_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Solutions to the [Deep RL Bootcamp](https://sites.google.com/view/deep-rl-bootcamp) labs 2 | 3 | * [Prelab](prelab): Set up your computer for all labs. 4 | * [Lab 1](lab1): Markov Decision Processes. You will implement value iteration, policy iteration, and tabular Q-learning and apply these algorithms to simple environments including tabular maze navigation (FrozenLake) and controlling a simple crawler robot. 5 | * [Lab 2](lab2): Introduction to Chainer. You will implement deep supervised learning using Chainer, and apply it to the MNIST dataset. 6 | * [Lab 3](lab3): Deep Q-Learning. You will implement the DQN algorithm and apply it to Atari games. 7 | * [Lab 4](lab4): Policy Optimization Algorithms. You will implement various policy optimization algorithms, including policy gradient, natural policy gradient, trust-region policy optimization (TRPO), and asynchronous advantage actor-critic (A3C). You will apply these algorithms to classic control tasks, Atari games, and roboschool locomotion environments. -------------------------------------------------------------------------------- /lab1/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Deep RL Bootcamp Organizers. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /lab1/discrete_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Peter Chen, Rocky Duan, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | import numpy as np 16 | 17 | from gym import Env, spaces 18 | from gym.utils import seeding 19 | 20 | def categorical_sample(prob_n, np_random): 21 | """ 22 | Sample from categorical distribution 23 | Each row specifies class probabilities 24 | """ 25 | prob_n = np.asarray(prob_n) 26 | csprob_n = np.cumsum(prob_n) 27 | return (csprob_n > np_random.rand()).argmax() 28 | 29 | 30 | class DiscreteEnv(Env): 31 | 32 | """ 33 | Has the following members 34 | - nS: number of states 35 | - nA: number of actions 36 | - P: transitions (*) 37 | - isd: initial state distribution (**) 38 | 39 | (*) dictionary dict of dicts of lists, where 40 | P[s][a] == [(probability, nextstate, reward, done), ...] 41 | (**) list or array of length nS 42 | 43 | 44 | """ 45 | def __init__(self, nS, nA, P, isd): 46 | self.P = P 47 | self.isd = isd 48 | self.lastaction=None # for rendering 49 | self.nS = nS 50 | self.nA = nA 51 | 52 | self.action_space = spaces.Discrete(self.nA) 53 | self.observation_space = spaces.Discrete(self.nS) 54 | 55 | self._seed() 56 | self._reset() 57 | 58 | def _seed(self, seed=None): 59 | self.np_random, seed = seeding.np_random(seed) 60 | return [seed] 61 | 62 | def _reset(self): 63 | self.s = categorical_sample(self.isd, self.np_random) 64 | self.lastaction=None 65 | return self.s 66 | 67 | def _step(self, a): 68 | transitions = self.P[self.s][a] 69 | i = categorical_sample([t[0] for t in transitions], self.np_random) 70 | p, s, r, d= transitions[i] 71 | self.s = s 72 | self.lastaction=a 73 | return (s, r, d, {"prob" : p}) 74 | -------------------------------------------------------------------------------- /lab1/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | channels: 3 | - menpo 4 | - soumith 5 | dependencies: 6 | - python==3.5.3 7 | - opencv3=3.1.0 8 | - numpy==1.13.1 9 | - scipy==0.19.1 10 | - notebook 11 | - pip: 12 | - gym==0.9.2 13 | - chainer==2.0.1 14 | - ipdb==0.10.3 15 | - tblib==1.3.2 16 | - Pillow==4.2.1 17 | - PyOpenGL==3.1.0 18 | - cloudpickle==0.3.1 19 | - click==6.7 20 | - python-dateutil==2.6.1 21 | - pyyaml==3.12 22 | - easydict==1.7 23 | - boto3==1.4.4 24 | - mako==1.0.7 25 | - redis==2.10.5 26 | - Flask==0.12.2 27 | - plotly==2.0.12 28 | - tqdm==4.14.0 29 | - cupy==1.0.1; 'linux' in sys_platform 30 | - cached-property==1.3.0 31 | - h5py==2.7.0 32 | - matplotlib 33 | -------------------------------------------------------------------------------- /lab1/frozen_lake.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Peter Chen, Rocky Duan, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Code adapted from Berkeley Deep RL Class [HW2](https://github.com/berkeleydeeprlcourse/homework/blob/c1027d83cd542e67ebed982d44666e0d22a00141/hw2/HW2.ipynb) [(license)](https://github.com/berkeleydeeprlcourse/homework/blob/master/LICENSE). 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import numpy as np 18 | import sys 19 | from six import StringIO, b 20 | 21 | from gym import utils 22 | import discrete_env 23 | 24 | LEFT = 0 25 | DOWN = 1 26 | RIGHT = 2 27 | UP = 3 28 | 29 | MAPS = { 30 | "4x4": [ 31 | "SFFF", 32 | "FHFH", 33 | "FFFH", 34 | "HFFG" 35 | ], 36 | "8x8": [ 37 | "SFFFFFFF", 38 | "FFFFFFFF", 39 | "FFFHFFFF", 40 | "FFFFFHFF", 41 | "FFFHFFFF", 42 | "FHHFFFHF", 43 | "FHFFHFHF", 44 | "FFFHFFFG" 45 | ], 46 | } 47 | 48 | class FrozenLakeEnv(discrete_env.DiscreteEnv): 49 | """ 50 | Winter is here. You and your friends were tossing around a frisbee at the park 51 | when you made a wild throw that left the frisbee out in the middle of the lake. 52 | The water is mostly frozen, but there are a few holes where the ice has melted. 53 | If you step into one of those holes, you'll fall into the freezing water. 54 | At this time, there's an international frisbee shortage, so it's absolutely imperative that 55 | you navigate across the lake and retrieve the disc. 56 | However, the ice is slippery, so you won't always move in the direction you intend. 57 | The surface is described using a grid like the following 58 | 59 | SFFF 60 | FHFH 61 | FFFH 62 | HFFG 63 | 64 | S : starting point, safe 65 | F : frozen surface, safe 66 | H : hole, fall to your doom 67 | G : goal, where the frisbee is located 68 | 69 | The episode ends when you reach the goal or fall in a hole. 70 | You receive a reward of 1 if you reach the goal, and zero otherwise. 71 | 72 | """ 73 | 74 | metadata = {'render.modes': ['human', 'ansi']} 75 | 76 | def __init__(self, desc=None, map_name="4x4",is_slippery=True): 77 | if desc is None and map_name is None: 78 | raise ValueError('Must provide either desc or map_name') 79 | elif desc is None: 80 | desc = MAPS[map_name] 81 | self.desc = desc = np.asarray(desc,dtype='c') 82 | self.nrow, self.ncol = nrow, ncol = desc.shape 83 | 84 | nA = 4 85 | nS = nrow * ncol 86 | 87 | isd = np.array(desc == b'S').astype('float64').ravel() 88 | isd /= isd.sum() 89 | 90 | P = {s : {a : [] for a in range(nA)} for s in range(nS)} 91 | 92 | def to_s(row, col): 93 | return row*ncol + col 94 | def inc(row, col, a): 95 | if a==0: # left 96 | col = max(col-1,0) 97 | elif a==1: # down 98 | row = min(row+1,nrow-1) 99 | elif a==2: # right 100 | col = min(col+1,ncol-1) 101 | elif a==3: # up 102 | row = max(row-1,0) 103 | return (row, col) 104 | 105 | for row in range(nrow): 106 | for col in range(ncol): 107 | s = to_s(row, col) 108 | for a in range(4): 109 | li = P[s][a] 110 | letter = desc[row, col] 111 | if letter in b'GH': 112 | li.append((1.0, s, 0, True)) 113 | else: 114 | if is_slippery: 115 | for b in [(a-1)%4, a, (a+1)%4]: 116 | newrow, newcol = inc(row, col, b) 117 | newstate = to_s(newrow, newcol) 118 | newletter = desc[newrow, newcol] 119 | done = bytes(newletter) in b'GH' 120 | rew = float(newletter == b'G') 121 | li.append((0.8 if b==a else 0.1, newstate, rew, done)) 122 | else: 123 | newrow, newcol = inc(row, col, a) 124 | newstate = to_s(newrow, newcol) 125 | newletter = desc[newrow, newcol] 126 | done = bytes(newletter) in b'GH' 127 | rew = float(newletter == b'G') 128 | li.append((1.0, newstate, rew, done)) 129 | 130 | super(FrozenLakeEnv, self).__init__(nS, nA, P, isd) 131 | 132 | def _render(self, mode='human', close=False): 133 | if close: 134 | return 135 | outfile = StringIO() if mode == 'ansi' else sys.stdout 136 | 137 | row, col = self.s // self.ncol, self.s % self.ncol 138 | desc = self.desc.tolist() 139 | desc = [[c.decode('utf-8') for c in line] for line in desc] 140 | desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) 141 | if self.lastaction is not None: 142 | outfile.write(" ({})\n".format(["Left","Down","Right","Up"][self.lastaction])) 143 | else: 144 | outfile.write("\n") 145 | outfile.write("\n".join(''.join(line) for line in desc)+"\n") 146 | 147 | return outfile 148 | -------------------------------------------------------------------------------- /lab1/lab1.md: -------------------------------------------------------------------------------- 1 | * Activate the conda environment by running 2 | ``` 3 | source activate deeprlbootcamp 4 | ``` 5 | * Launch IPython Notebook from this directory; this should open up a browser window where you can click to open Lab1. 6 | ``` 7 | jupyter notebook 8 | ``` 9 | * After opening a lab file, click “File - Trust Notebook” 10 | * If you have never used IPython Notebook before, skim this quick tutorial here: http://cs231n.github.io/ipython-tutorial/ 11 | -------------------------------------------------------------------------------- /lab1/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Peter Chen, Rocky Duan, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Code adapted from Berkeley Deep RL Class [HW2](https://github.com/berkeleydeeprlcourse/homework/blob/c1027d83cd542e67ebed982d44666e0d22a00141/hw2/HW2.ipynb) [(license)](https://github.com/berkeleydeeprlcourse/homework/blob/master/LICENSE). 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import numpy as np 18 | import sys 19 | from six import StringIO, b 20 | 21 | from gym import utils 22 | import discrete_env 23 | 24 | LEFT = 0 25 | DOWN = 1 26 | RIGHT = 2 27 | UP = 3 28 | 29 | MAPS = { 30 | "4x4": [ 31 | "SFFF", 32 | "FHFH", 33 | "FFFH", 34 | "HFFG" 35 | ], 36 | "8x8": [ 37 | "SFFFFFFF", 38 | "FFFFFFFF", 39 | "FFFHFFFF", 40 | "FFFFFHFF", 41 | "FFFHFFFF", 42 | "FHHFFFHF", 43 | "FHFFHFHF", 44 | "FFFHFFFG" 45 | ], 46 | } 47 | 48 | class FrozenLakeEnv(discrete_env.DiscreteEnv): 49 | """ 50 | Winter is here. You and your friends were tossing around a frisbee at the park 51 | when you made a wild throw that left the frisbee out in the middle of the lake. 52 | The water is mostly frozen, but there are a few holes where the ice has melted. 53 | If you step into one of those holes, you'll fall into the freezing water. 54 | At this time, there's an international frisbee shortage, so it's absolutely imperative that 55 | you navigate across the lake and retrieve the disc. 56 | However, the ice is slippery, so you won't always move in the direction you intend. 57 | The surface is described using a grid like the following 58 | 59 | SFFF 60 | FHFH 61 | FFFH 62 | HFFG 63 | 64 | S : starting point, safe 65 | F : frozen surface, safe 66 | H : hole, fall to your doom 67 | G : goal, where the frisbee is located 68 | 69 | The episode ends when you reach the goal or fall in a hole. 70 | You receive a reward of 1 if you reach the goal, and zero otherwise. 71 | 72 | """ 73 | 74 | metadata = {'render.modes': ['human', 'ansi']} 75 | 76 | def __init__(self, desc=None, map_name="4x4",is_slippery=True): 77 | if desc is None and map_name is None: 78 | raise ValueError('Must provide either desc or map_name') 79 | elif desc is None: 80 | desc = MAPS[map_name] 81 | self.desc = desc = np.asarray(desc,dtype='c') 82 | self.nrow, self.ncol = nrow, ncol = desc.shape 83 | 84 | nA = 4 85 | nS = nrow * ncol 86 | 87 | isd = np.array(desc == b'S').astype('float64').ravel() 88 | isd /= isd.sum() 89 | 90 | P = {s : {a : [] for a in range(nA)} for s in range(nS)} 91 | 92 | def to_s(row, col): 93 | return row*ncol + col 94 | def inc(row, col, a): 95 | if a==0: # left 96 | col = max(col-1,0) 97 | elif a==1: # down 98 | row = min(row+1,nrow-1) 99 | elif a==2: # right 100 | col = min(col+1,ncol-1) 101 | elif a==3: # up 102 | row = max(row-1,0) 103 | return (row, col) 104 | 105 | for row in range(nrow): 106 | for col in range(ncol): 107 | s = to_s(row, col) 108 | for a in range(4): 109 | li = P[s][a] 110 | letter = desc[row, col] 111 | if letter in b'GH': 112 | li.append((1.0, s, 0, True)) 113 | else: 114 | if is_slippery: 115 | for b in [(a-1)%4, a, (a+1)%4]: 116 | newrow, newcol = inc(row, col, b) 117 | newstate = to_s(newrow, newcol) 118 | newletter = desc[newrow, newcol] 119 | done = bytes(newletter) in b'GH' 120 | rew = float(newletter == b'G') 121 | li.append((0.8 if b==a else 0.1, newstate, rew, done)) 122 | else: 123 | newrow, newcol = inc(row, col, a) 124 | newstate = to_s(newrow, newcol) 125 | newletter = desc[newrow, newcol] 126 | done = bytes(newletter) in b'GH' 127 | rew = float(newletter == b'G') 128 | li.append((1.0, newstate, rew, done)) 129 | 130 | super(FrozenLakeEnv, self).__init__(nS, nA, P, isd) 131 | 132 | def _render(self, mode='human', close=False): 133 | if close: 134 | return 135 | outfile = StringIO() if mode == 'ansi' else sys.stdout 136 | 137 | row, col = self.s // self.ncol, self.s % self.ncol 138 | desc = self.desc.tolist() 139 | desc = [[c.decode('utf-8') for c in line] for line in desc] 140 | desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) 141 | if self.lastaction is not None: 142 | outfile.write(" ({})\n".format(["Left","Down","Right","Up"][self.lastaction])) 143 | else: 144 | outfile.write("\n") 145 | outfile.write("\n".join(''.join(line) for line in desc)+"\n") 146 | 147 | return outfile 148 | 149 | def make_grader(expected): 150 | boxed_i = [0] 151 | boxed_err = [False] 152 | expected_lines = expected.split("\n") 153 | def checking_print(line): 154 | if boxed_i[0] < len(expected_lines): 155 | expected_line = expected_lines[boxed_i[0]] 156 | else: 157 | expected_line = "[END]" 158 | if expected_line == line: 159 | print(line) 160 | else: 161 | boxed_err[0] = True 162 | print("\x1b[41m", end="") 163 | print(line, end="") 164 | print("\x1b[0m", end="") 165 | print(" *** Expected: \x1b[42m" + expected_line + "\x1b[0m") 166 | boxed_i[0] += 1 167 | if boxed_i[0] == len(expected_lines): 168 | print("Test failed" if boxed_err[0] else "Test succeeded") 169 | return checking_print 170 | -------------------------------------------------------------------------------- /lab2/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Deep RL Bootcamp Organizers. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /lab2/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | channels: 3 | - menpo 4 | - soumith 5 | dependencies: 6 | - python==3.5.3 7 | - opencv3=3.1.0 8 | - numpy==1.13.1 9 | - scipy==0.19.1 10 | - notebook 11 | - pip: 12 | - gym==0.9.2 13 | - chainer==2.0.1 14 | - ipdb==0.10.3 15 | - tblib==1.3.2 16 | - Pillow==4.2.1 17 | - PyOpenGL==3.1.0 18 | - cloudpickle==0.3.1 19 | - click==6.7 20 | - python-dateutil==2.6.1 21 | - pyyaml==3.12 22 | - easydict==1.7 23 | - boto3==1.4.4 24 | - mako==1.0.7 25 | - redis==2.10.5 26 | - Flask==0.12.2 27 | - plotly==2.0.12 28 | - tqdm==4.14.0 29 | - cupy==1.0.1; 'linux' in sys_platform 30 | - cached-property==1.3.0 31 | - h5py==2.7.0 32 | - matplotlib 33 | -------------------------------------------------------------------------------- /lab2/lab2.md: -------------------------------------------------------------------------------- 1 | * Activate the conda environment by running 2 | ``` 3 | source activate deeprlbootcamp 4 | ``` 5 | * Launch IPython Notebook from this directory; this should open up a browser window where you can click to open Lab2. 6 | ``` 7 | jupyter notebook 8 | ``` 9 | * After opening a lab file, click “File - Trust Notebook” 10 | * If you have never used IPython Notebook before, skim this quick tutorial here: http://cs231n.github.io/ipython-tutorial/ 11 | -------------------------------------------------------------------------------- /lab3/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Deep RL Bootcamp Organizers. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /lab3/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | viskit_port=$("$DIR/findport.py" 5000 1) 4 | xhost=xhost 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | if [[ $(uname) == 'Darwin' ]]; then 12 | # if xhost not defined, check 13 | if ! hash $xhost 2>/dev/null; then 14 | xhost=/opt/X11/bin/xhost 15 | if [ ! -f $xhost ]; then 16 | echo "xhost not found!" 17 | exit 18 | fi 19 | fi 20 | ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}') 21 | $xhost + $ip >/dev/null 22 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 23 | -e DISPLAY=$ip:0 \ 24 | -v "$DIR":/root/code/bootcamp_pg \ 25 | -ti dementrock/deeprlbootcamp \ 26 | ${1-/bin/bash} "${@:2}" 27 | $xhost - $ip >/dev/null 28 | elif [[ $(uname) == 'Linux' ]]; then 29 | $xhost +local:root >/dev/null 30 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 31 | -e DISPLAY=$DISPLAY \ 32 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 33 | -v "$DIR":/root/code/bootcamp_pg \ 34 | -ti dementrock/deeprlbootcamp \ 35 | ${1-/bin/bash} "${@:2}" 36 | $xhost -local:root >/dev/null 37 | else 38 | echo "This script only supports macOS or Linux" 39 | fi 40 | -------------------------------------------------------------------------------- /lab3/docker_run_vnc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | vnc_port=$("$DIR/findport.sh" 3000 1) 4 | viskit_port=$("$DIR/findport.sh" 5000 1) 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284" 12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 13 | -v "$DIR":/root/code/bootcamp_pg \ 14 | -ti dementrock/deeprlbootcamp \ 15 | ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}" 16 | -------------------------------------------------------------------------------- /lab3/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | channels: 3 | - menpo 4 | - soumith 5 | dependencies: 6 | - python==3.5.3 7 | - opencv3=3.1.0 8 | - numpy==1.13.1 9 | - scipy==0.19.1 10 | - pip: 11 | - gym==0.9.2 12 | - chainer==2.0.1 13 | - ipdb==0.10.3 14 | - tblib==1.3.2 15 | - atari_py==0.1.1 16 | - Pillow==4.2.1 17 | - PyOpenGL==3.1.0 18 | - cloudpickle==0.3.1 19 | - click==6.7 20 | - python-dateutil==2.6.1 21 | - pyyaml==3.12 22 | - easydict==1.7 23 | - boto3==1.4.4 24 | - mako==1.0.7 25 | - redis==2.10.5 26 | - Flask==0.12.2 27 | - plotly==2.0.12 28 | - tqdm==4.14.0 29 | - cupy==1.0.1; 'linux' in sys_platform 30 | - cached-property==1.3.0 31 | - h5py==2.7.0 32 | -------------------------------------------------------------------------------- /lab3/findport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Usage: findport.py 3000 100 4 | # 5 | 6 | """ 7 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 8 | 9 | Copyright 2017 Deep RL Bootcamp Organizers. 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | 17 | """ 18 | 19 | 20 | from __future__ import print_function 21 | import socket 22 | from contextlib import closing 23 | import sys 24 | 25 | if len(sys.argv) != 3: 26 | print("Usage: {} ".format(sys.argv[0])) 27 | sys.exit(1) 28 | 29 | base = int(sys.argv[1]) 30 | increment = int(sys.argv[2]) 31 | 32 | 33 | def find_free_port(): 34 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 35 | for port in range(base, 65536, increment): 36 | try: 37 | s.bind(('', port)) 38 | return s.getsockname()[1] 39 | except socket.error: 40 | continue 41 | 42 | 43 | print(find_free_port()) 44 | -------------------------------------------------------------------------------- /lab3/findport.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Please run as root. 4 | # Usage: bash findport.sh 3000 100 5 | # 6 | 7 | 8 | if [[ -z "$1" || -z "$2" ]]; then 9 | echo "Usage: $0 " 10 | exit 1 11 | fi 12 | 13 | 14 | BASE=$1 15 | INCREMENT=$2 16 | 17 | port=$BASE 18 | isfree=$(netstat -aln | grep $port) 19 | 20 | while [[ -n "$isfree" ]]; do 21 | port=$[port+INCREMENT] 22 | isfree=$(netstat -aln | grep $port) 23 | done 24 | 25 | echo "$port" 26 | exit 0 27 | -------------------------------------------------------------------------------- /lab3/lab3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/lab3.pdf -------------------------------------------------------------------------------- /lab3/launch_bg_screen_buffer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | killall() { 4 | kill -INT "$xvfb_pid" 5 | kill -INT "$x11vnc_pid" 6 | exit 7 | } 8 | 9 | trap killall SIGINT 10 | trap killall SIGTERM 11 | trap killall SIGKILL 12 | 13 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$! 14 | 15 | mkdir ~/.x11vnc 16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd 17 | 18 | command="${1-/bin/bash} ${@:2}" 19 | 20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!" 21 | 22 | DISPLAY=:99 $command 23 | 24 | killall 25 | -------------------------------------------------------------------------------- /lab3/scripts/setup_xquartz.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Check if XQuartz is installed 3 | 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@" 5 | 6 | app_dir=/Applications/Utilities/XQuartz.app 7 | 8 | if [ -d $app_dir ]; then 9 | # Check installed version 10 | app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString) 11 | if [ $app_version == "2.7.11" ]; then 12 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 13 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 14 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 15 | echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!" 16 | exit 17 | else 18 | read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response 19 | case "$response" in 20 | [yY][eE][sS]|[yY]) 21 | ;; 22 | *) 23 | exit 24 | ;; 25 | esac 26 | fi 27 | fi 28 | 29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg 30 | dmg_path=/tmp/xquartz.dmg 31 | echo "Downloading dmg from $url..." 32 | /usr/bin/curl -L -o $dmg_path $url 33 | echo "Mounting dmg file..." 34 | hdiutil mount $dmg_path 35 | echo "Installing..." 36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg -target / 37 | 38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 41 | 42 | echo "Done! Make sure to log out and then log back in for the changes to take effect." 43 | -------------------------------------------------------------------------------- /lab3/scripts/test_environment_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | def main(): 18 | import roboschool 19 | import gym 20 | import chainer 21 | env = gym.make('CartPole-v0') 22 | env.reset() 23 | env.step(env.action_space.sample()) 24 | env = gym.make('RoboschoolHalfCheetah-v1') 25 | env.reset() 26 | env.step(env.action_space.sample()) 27 | print("Your environment has been successfully set up!") 28 | 29 | 30 | if __name__ == "__main__": 31 | main() 32 | -------------------------------------------------------------------------------- /lab3/simpledqn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/simpledqn/__init__.py -------------------------------------------------------------------------------- /lab3/simpledqn/gridworld_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import numpy as np 18 | import sys 19 | from six import StringIO, b 20 | 21 | from gym import utils 22 | from gym.envs.toy_text import discrete 23 | from gym.envs.registration import register 24 | 25 | LEFT = 0 26 | DOWN = 1 27 | RIGHT = 2 28 | UP = 3 29 | 30 | MAPS = { 31 | "4x4": [ 32 | "SFFF", 33 | "FFFH", 34 | "FFFF", 35 | "HFFG" 36 | ], 37 | "8x8": [ 38 | "SFFFFFFF", 39 | "FFFFFFFF", 40 | "FFFHFFFF", 41 | "FFFFFHFF", 42 | "FFFHFFFF", 43 | "FHHFFFHF", 44 | "FHFFHFHF", 45 | "FFFHFFFG" 46 | ], 47 | "9x9": [ 48 | "HFFFFFFFH", 49 | "FFFFFFFFF", 50 | "FFFFFFFFF", 51 | "FFFFFFFFF", 52 | "FFFFSFFFF", 53 | "FFFFFFFFF", 54 | "FFFFFFFFF", 55 | "FFFFFFFFF", 56 | "HFFFFFFFH" 57 | ] 58 | } 59 | 60 | 61 | def to_one_hot(x, len): 62 | one_hot = np.zeros(len) 63 | one_hot[x] = 1 64 | return one_hot 65 | 66 | 67 | class GridWorld(discrete.DiscreteEnv): 68 | """ 69 | Winter is here. You and your friends were tossing around a frisbee at the park 70 | when you made a wild throw that left the frisbee out in the middle of the lake. 71 | The water is mostly frozen, but there are a few holes where the ice has melted. 72 | If you step into one of those holes, you'll fall into the freezing water. 73 | At this time, there's an international frisbee shortage, so it's absolutely imperative that 74 | you navigate across the lake and retrieve the disc. 75 | However, the ice is slippery, so you won't always move in the direction you intend. 76 | The surface is described using a grid like the following 77 | 78 | SFFF 79 | FHFH 80 | FFFH 81 | HFFG 82 | 83 | S : starting point, safe 84 | F : frozen surface, safe 85 | H : hole, fall to your doom 86 | G : goal, where the frisbee is located 87 | 88 | The episode ends when you reach the goal or fall in a hole. 89 | You receive a reward of 1 if you reach the goal, and zero otherwise. 90 | 91 | """ 92 | 93 | metadata = {'render.modes': ['human', 'ansi']} 94 | 95 | def __init__(self, desc=None, map_name="4x4", is_slippery=False): 96 | if desc is None and map_name is None: 97 | raise ValueError('Must provide either desc or map_name') 98 | elif desc is None: 99 | desc = MAPS[map_name] 100 | self.desc = desc = np.asarray(desc, dtype='c') 101 | self.nrow, self.ncol = nrow, ncol = desc.shape 102 | 103 | nA = 4 104 | nS = nrow * ncol 105 | 106 | isd = np.array(desc == b'S').astype('float64').ravel() 107 | isd /= isd.sum() 108 | 109 | P = {s: {a: [] for a in range(nA)} for s in range(nS)} 110 | 111 | def to_s(row, col): 112 | return row * ncol + col 113 | 114 | def inc(row, col, a): 115 | if a == 0: # left 116 | col = max(col - 1, 0) 117 | elif a == 1: # down 118 | row = min(row + 1, nrow - 1) 119 | elif a == 2: # right 120 | col = min(col + 1, ncol - 1) 121 | elif a == 3: # up 122 | row = max(row - 1, 0) 123 | return (row, col) 124 | 125 | for row in range(nrow): 126 | for col in range(ncol): 127 | s = to_s(row, col) 128 | for a in range(4): 129 | li = P[s][a] 130 | letter = desc[row, col] 131 | if letter in b'GH': 132 | li.append((1.0, s, 0, True)) 133 | else: 134 | if is_slippery: 135 | for b in [(a - 1) % 4, a, (a + 1) % 4]: 136 | newrow, newcol = inc(row, col, b) 137 | newstate = to_s(newrow, newcol) 138 | newletter = desc[newrow, newcol] 139 | done = bytes(newletter) in b'GH' 140 | if newletter == b'G': 141 | rew = 1.0 142 | elif newletter == b'H': 143 | rew = .0 144 | else: 145 | rew = 0. 146 | # rew = float(newletter == b'G') 147 | li.append((1.0 / 3.0, newstate, rew, done)) 148 | else: 149 | newrow, newcol = inc(row, col, a) 150 | newstate = to_s(newrow, newcol) 151 | newletter = desc[newrow, newcol] 152 | done = bytes(newletter) in b'GH' 153 | # rew = float(newletter == b'G') 154 | if newletter == b'G': 155 | rew = 1.0 156 | elif newletter == b'H': 157 | rew = 0. 158 | else: 159 | rew = 0. 160 | li.append((1.0, newstate, rew, done)) 161 | 162 | super(GridWorld, self).__init__(nS, nA, P, isd) 163 | 164 | def _reset(self): 165 | s = super(GridWorld, self)._reset() 166 | return to_one_hot(s, self.nS) 167 | 168 | def _step(self, a): 169 | s, r, d, p = super(GridWorld, self)._step(a) 170 | return to_one_hot(s, self.nS), r, d, p 171 | 172 | def print_obs(self, obs): 173 | import copy 174 | map = copy.deepcopy(self.desc).astype(str) 175 | _obs = int(np.where(obs == 1)[0][0]) 176 | map[_obs // 9, _obs % 9] = 'X' 177 | for row in map: 178 | print(row) 179 | 180 | def _render(self, mode='human', close=False): 181 | if close: 182 | return 183 | outfile = StringIO() if mode == 'ansi' else sys.stdout 184 | 185 | row, col = self.s // self.ncol, self.s % self.ncol 186 | desc = self.desc.tolist() 187 | desc = [[c.decode('utf-8') for c in line] for line in desc] 188 | desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) 189 | if self.lastaction is not None: 190 | outfile.write(" ({})\n".format( 191 | ["Left", "Down", "Right", "Up"][self.lastaction])) 192 | else: 193 | outfile.write("\n") 194 | outfile.write("\n".join(''.join(line) for line in desc) + "\n") 195 | 196 | if mode != 'human': 197 | return outfile 198 | 199 | 200 | register( 201 | 'GridWorld-v0', 202 | entry_point='simpledqn.gridworld_env:GridWorld', 203 | timestep_limit=40, 204 | ) 205 | -------------------------------------------------------------------------------- /lab3/simpledqn/replay_buffer.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import numpy as np 18 | import random 19 | import pickle 20 | 21 | 22 | class ReplayBuffer(object): 23 | def __init__(self, max_size): 24 | """Simple replay buffer for storing sampled DQN (s, a, s', r) transitions as tuples. 25 | 26 | :param size: Maximum size of the replay buffer. 27 | """ 28 | self._buffer = [] 29 | self._max_size = max_size 30 | self._idx = 0 31 | 32 | def __len__(self): 33 | return len(self._buffer) 34 | 35 | def add(self, obs_t, act, rew, obs_tp1, done): 36 | """ 37 | Add a new sample to the replay buffer. 38 | :param obs_t: observation at time t 39 | :param act: action 40 | :param rew: reward 41 | :param obs_tp1: observation at time t+1 42 | :param done: termination signal (whether episode has finished or not) 43 | """ 44 | data = (obs_t, act, rew, obs_tp1, done) 45 | if self._idx >= len(self._buffer): 46 | self._buffer.append(data) 47 | else: 48 | self._buffer[self._idx] = data 49 | self._idx = (self._idx + 1) % self._max_size 50 | 51 | def _encode_sample(self, idxes): 52 | obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] 53 | for i in idxes: 54 | data = self._buffer[i] 55 | obs_t, action, reward, obs_tp1, done = data 56 | obses_t.append(np.array(obs_t, copy=False)) 57 | actions.append(np.array(action, copy=False)) 58 | rewards.append(reward) 59 | obses_tp1.append(np.array(obs_tp1, copy=False)) 60 | dones.append(done) 61 | return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones) 62 | 63 | def sample(self, batch_size): 64 | """Sample a batch of transition tuples. 65 | 66 | :param batch_size: Number of sampled transition tuples. 67 | :return: Tuple of transitions. 68 | """ 69 | idxes = [random.randint(0, len(self._buffer) - 1) 70 | for _ in range(batch_size)] 71 | return self._encode_sample(idxes) 72 | 73 | def dump(self, file_path=None): 74 | """Dump the replay buffer into a file. 75 | """ 76 | file = open(file_path, 'wb') 77 | pickle.dump(self._buffer, file, -1) 78 | file.close() 79 | 80 | def load(self, file_path=None): 81 | """Load the replay buffer from a file 82 | """ 83 | file = open(file_path, 'rb') 84 | self._buffer = pickle.load(file) 85 | file.close() 86 | -------------------------------------------------------------------------------- /lab3/simpledqn/replay_buffer_warm_start.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/simpledqn/replay_buffer_warm_start.pkl -------------------------------------------------------------------------------- /lab3/simpledqn/simple_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import numpy as np 18 | import scipy.special 19 | import chainer 20 | 21 | 22 | # Compute gradient approximately using finite difference 23 | def numerical_grad(f, x, eps=1e-8): 24 | grad = np.zeros_like(x) 25 | for i in range(len(x)): 26 | xplus = np.array(x) 27 | xplus[i] += eps 28 | fplus = f(xplus) 29 | xminus = np.array(x) 30 | xminus[i] -= eps 31 | fminus = f(xminus) 32 | grad[i] = (fplus - fminus) / (2 * eps) 33 | return grad 34 | 35 | 36 | def gradient_check(f, g, x): 37 | # Test the implementation of g(x) = df/dx 38 | # Perform numerical differentiation and test it 39 | g_num = numerical_grad(f, x) 40 | g_test = g(x) 41 | try: 42 | np.testing.assert_allclose(g_num, g_test, rtol=1e-5) 43 | print("Gradient check passed!") 44 | except AssertionError as e: 45 | print(e) 46 | print("Warning: Gradient check didn't pass!") 47 | 48 | 49 | def log_softmax(logits): 50 | return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True) 51 | 52 | 53 | def softmax(logits): 54 | x = logits 55 | x = x - np.max(x, axis=-1, keepdims=True) 56 | x = np.exp(x) 57 | return x / np.sum(x, axis=-1, keepdims=True) 58 | 59 | 60 | def weighted_sample(logits, rng=np.random): 61 | weights = softmax(logits) 62 | return min( 63 | int(np.sum(rng.uniform() > np.cumsum(weights))), 64 | len(weights) - 1 65 | ) 66 | 67 | 68 | def include_bias(x): 69 | # Add a constant term (1.0) to each entry in x 70 | return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1) 71 | 72 | 73 | _tested = set() 74 | 75 | nprs = np.random.RandomState 76 | 77 | 78 | def assert_allclose(a, b): 79 | if isinstance(a, (np.ndarray, float, int)): 80 | np.testing.assert_allclose(a, b) 81 | elif isinstance(a, (tuple, list)): 82 | assert isinstance(b, (tuple, list)) 83 | assert len(a) == len(b) 84 | for a_i, b_i in zip(a, b): 85 | assert_allclose(a_i, b_i) 86 | elif isinstance(a, chainer.Variable): 87 | assert isinstance(b, chainer.Variable) 88 | assert_allclose(a.data, b.data) 89 | else: 90 | raise NotImplementedError 91 | 92 | 93 | def test_once(fn, kwargs, desired_output=None): 94 | if fn.__name__ in _tested: 95 | return 96 | _tested.add(fn.__name__) 97 | 98 | if callable(kwargs): 99 | kwargs = kwargs() 100 | 101 | if callable(desired_output): 102 | desired_output = desired_output() 103 | 104 | if desired_output is None: 105 | print("Desired output for %s:" % (fn.__name__), repr(fn(**kwargs))) 106 | exit() 107 | else: 108 | try: 109 | output = fn(**kwargs) 110 | assert_allclose(desired_output, output) 111 | print("Test for %s passed!" % (fn.__name__)) 112 | except AssertionError as e: 113 | print(e) 114 | print("Warning: test for %s didn't pass!" % (fn.__name__)) 115 | -------------------------------------------------------------------------------- /lab3/simpledqn/weights_warm_start.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/simpledqn/weights_warm_start.pkl -------------------------------------------------------------------------------- /lab3/simpledqn/wrappers.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import gym 18 | import numpy as np 19 | 20 | 21 | class NoopResetEnv(gym.Wrapper): 22 | def __init__(self, env=None, noop_max=30): 23 | """Sample initial states by taking random number of no-ops on reset. 24 | No-op is assumed to be action 0. 25 | """ 26 | super(NoopResetEnv, self).__init__(env) 27 | self.noop_max = noop_max 28 | self.override_num_noops = None 29 | assert env.unwrapped.get_action_meanings()[0] == 'NOOP' 30 | 31 | def _reset(self): 32 | """ Do no-op action for a number of steps in [1, noop_max].""" 33 | self.env.reset() 34 | if self.override_num_noops is not None: 35 | noops = self.override_num_noops 36 | else: 37 | noops = np.random.randint(1, self.noop_max + 1) 38 | assert noops > 0 39 | obs = None 40 | for _ in range(noops): 41 | obs, _, done, _ = self.env.step(0) 42 | if done: 43 | obs = self.env.reset() 44 | return obs 45 | 46 | 47 | class EpisodicLifeEnv(gym.Wrapper): 48 | def __init__(self, env=None): 49 | """Make end-of-life == end-of-episode, but only reset on true game over. 50 | Done by DeepMind for the DQN and co. since it helps value estimation. 51 | """ 52 | super(EpisodicLifeEnv, self).__init__(env) 53 | self.lives = 0 54 | self.was_real_done = True 55 | self.was_real_reset = False 56 | 57 | def _step(self, action): 58 | obs, reward, done, info = self.env.step(action) 59 | self.was_real_done = done 60 | # check current lives, make loss of life terminal, 61 | # then update lives to handle bonus lives 62 | lives = self.env.unwrapped.ale.lives() 63 | if lives < self.lives and lives > 0: 64 | # for Qbert somtimes we stay in lives == 0 condtion for a few frames 65 | # so its important to keep lives > 0, so that we only reset once 66 | # the environment advertises done. 67 | done = True 68 | self.lives = lives 69 | return obs, reward, done, info 70 | 71 | def _reset(self): 72 | """Reset only when lives are exhausted. 73 | This way all states are still reachable even though lives are episodic, 74 | and the learner need not know about any of this behind-the-scenes. 75 | """ 76 | if self.was_real_done: 77 | obs = self.env.reset() 78 | self.was_real_reset = True 79 | else: 80 | # no-op step to advance from terminal/lost life state 81 | obs, _, _, _ = self.env.step(0) 82 | self.was_real_reset = False 83 | self.lives = self.env.unwrapped.ale.lives() 84 | return obs 85 | -------------------------------------------------------------------------------- /lab3/viskit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab3/viskit/__init__.py -------------------------------------------------------------------------------- /lab3/viskit/core.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rein Houthooft, Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | import csv 16 | import itertools 17 | import json 18 | import os 19 | 20 | import numpy as np 21 | 22 | 23 | # from sandbox.rocky.utils.py_utils import AttrDict 24 | 25 | class AttrDict(dict): 26 | def __init__(self, *args, **kwargs): 27 | super(AttrDict, self).__init__(*args, **kwargs) 28 | self.__dict__ = self 29 | 30 | 31 | def unique(l): 32 | return list(set(l)) 33 | 34 | 35 | def flatten(l): 36 | return [item for sublist in l for item in sublist] 37 | 38 | 39 | def load_progress(progress_json_path, verbose=True): 40 | if verbose: 41 | print("Reading %s" % progress_json_path) 42 | entries = dict() 43 | rows = [] 44 | with open(progress_json_path, 'r') as f: 45 | lines = f.read().split('\n') 46 | for line in lines: 47 | if len(line) > 0: 48 | row = json.loads(line) 49 | rows.append(row) 50 | all_keys = set(k for row in rows for k in row.keys()) 51 | for k in all_keys: 52 | if k not in entries: 53 | entries[k] = [] 54 | for row in rows: 55 | if k in row: 56 | v = row[k] 57 | try: 58 | entries[k].append(float(v)) 59 | except: 60 | entries[k].append(np.nan) 61 | else: 62 | entries[k].append(np.nan) 63 | 64 | # entries[key] = [row.get(key, np.nan) for row in rows] 65 | # added_keys = set() 66 | # for k, v in row.items(): 67 | # if k not in entries: 68 | # entries[k] = [] 69 | # try: 70 | # entries[k].append(float(v)) 71 | # except: 72 | # entries[k].append(0.) 73 | # added_keys.add(k) 74 | # for k in entries.keys(): 75 | # if k not in added_keys: 76 | # entries[k].append(np.nan) 77 | entries = dict([(k, np.array(v)) for k, v in entries.items()]) 78 | return entries 79 | 80 | 81 | def flatten_dict(d): 82 | flat_params = dict() 83 | for k, v in d.items(): 84 | if isinstance(v, dict): 85 | v = flatten_dict(v) 86 | for subk, subv in flatten_dict(v).items(): 87 | flat_params[k + "." + subk] = subv 88 | else: 89 | flat_params[k] = v 90 | return flat_params 91 | 92 | 93 | def load_params(params_json_path): 94 | with open(params_json_path, 'r') as f: 95 | data = json.loads(f.read()) 96 | if "args_data" in data: 97 | del data["args_data"] 98 | if "exp_name" not in data: 99 | data["exp_name"] = params_json_path.split("/")[-2] 100 | return data 101 | 102 | 103 | def lookup(d, keys): 104 | if not isinstance(keys, list): 105 | keys = keys.split(".") 106 | for k in keys: 107 | if hasattr(d, "__getitem__"): 108 | if k in d: 109 | d = d[k] 110 | else: 111 | return None 112 | else: 113 | return None 114 | return d 115 | 116 | 117 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True): 118 | if isinstance(exp_folder_paths, str): 119 | exp_folder_paths = [exp_folder_paths] 120 | exps = [] 121 | for exp_folder_path in exp_folder_paths: 122 | exps += [x[0] for x in os.walk(exp_folder_path)] 123 | if verbose: 124 | print("finished walking exp folders") 125 | exps_data = [] 126 | for exp in exps: 127 | try: 128 | exp_path = exp 129 | variant_json_path = os.path.join(exp_path, "variant.json") 130 | progress_json_path = os.path.join(exp_path, "progress.json") 131 | progress = load_progress(progress_json_path, verbose=verbose) 132 | try: 133 | params = load_params(variant_json_path) 134 | except IOError: 135 | params = dict(exp_name="experiment") 136 | exps_data.append(AttrDict( 137 | progress=progress, params=params, flat_params=flatten_dict(params))) 138 | except IOError as e: 139 | if verbose: 140 | print(e) 141 | 142 | # a dictionary of all keys and types of values 143 | all_keys = dict() 144 | for data in exps_data: 145 | for key in data.flat_params.keys(): 146 | if key not in all_keys: 147 | all_keys[key] = type(data.flat_params[key]) 148 | 149 | # if any data does not have some key, specify the value of it 150 | if not ignore_missing_keys: 151 | default_values = dict() 152 | for data in exps_data: 153 | for key in sorted(all_keys.keys()): 154 | if key not in data.flat_params: 155 | if key not in default_values: 156 | default = None 157 | default_values[key] = default 158 | data.flat_params[key] = default_values[key] 159 | 160 | return exps_data 161 | 162 | 163 | def smart_repr(x): 164 | if isinstance(x, tuple): 165 | if len(x) == 0: 166 | return "tuple()" 167 | elif len(x) == 1: 168 | return "(%s,)" % smart_repr(x[0]) 169 | else: 170 | return "(" + ",".join(map(smart_repr, x)) + ")" 171 | else: 172 | if hasattr(x, "__call__"): 173 | return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__) 174 | else: 175 | return repr(x) 176 | 177 | 178 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1): 179 | try: 180 | stringified_pairs = sorted( 181 | map( 182 | eval, 183 | unique( 184 | flatten( 185 | [ 186 | list( 187 | map( 188 | smart_repr, 189 | list(d.flat_params.items()) 190 | ) 191 | ) 192 | for d in exps_data 193 | ] 194 | ) 195 | ) 196 | ), 197 | key=lambda x: ( 198 | tuple("" if it is None else str(it) for it in x), 199 | ) 200 | ) 201 | except Exception as e: 202 | print(e) 203 | import ipdb 204 | ipdb.set_trace() 205 | proposals = [(k, [x[1] for x in v]) 206 | for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])] 207 | filtered = [(k, v) for (k, v) in proposals if len(v) > l and all( 208 | [k.find(excluded_param) != 0 for excluded_param in excluded_params])] 209 | return filtered 210 | 211 | 212 | class Selector(object): 213 | def __init__(self, exps_data, filters=None, custom_filters=None): 214 | self._exps_data = exps_data 215 | if filters is None: 216 | self._filters = tuple() 217 | else: 218 | self._filters = tuple(filters) 219 | if custom_filters is None: 220 | self._custom_filters = [] 221 | else: 222 | self._custom_filters = custom_filters 223 | 224 | def where(self, k, v): 225 | return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters) 226 | 227 | def custom_filter(self, filter): 228 | return Selector(self._exps_data, self._filters, self._custom_filters + [filter]) 229 | 230 | def _check_exp(self, exp): 231 | # or exp.flat_params.get(k, None) is None 232 | return all( 233 | ((str(exp.flat_params.get(k, None)) == str(v) or ( 234 | k not in exp.flat_params)) for k, v in self._filters) 235 | ) and all(custom_filter(exp) for custom_filter in self._custom_filters) 236 | 237 | def extract(self): 238 | return list(filter(self._check_exp, self._exps_data)) 239 | 240 | def iextract(self): 241 | return filter(self._check_exp, self._exps_data) 242 | 243 | 244 | # Taken from plot.ly 245 | color_defaults = [ 246 | '#1f77b4', # muted blue 247 | '#ff7f0e', # safety orange 248 | '#2ca02c', # cooked asparagus green 249 | '#d62728', # brick red 250 | '#9467bd', # muted purple 251 | '#8c564b', # chestnut brown 252 | '#e377c2', # raspberry yogurt pink 253 | '#7f7f7f', # middle gray 254 | '#bcbd22', # curry yellow-green 255 | '#17becf' # blue-teal 256 | ] 257 | 258 | 259 | def hex_to_rgb(hex, opacity=1.0): 260 | if hex[0] == '#': 261 | hex = hex[1:] 262 | assert (len(hex) == 6) 263 | return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity) 264 | -------------------------------------------------------------------------------- /lab3/viskit/static/css/dropdowns-enhancement.css: -------------------------------------------------------------------------------- 1 | .dropdown-menu > li > label { 2 | display: block; 3 | padding: 3px 20px; 4 | clear: both; 5 | font-weight: normal; 6 | line-height: 1.42857143; 7 | color: #333333; 8 | white-space: nowrap; 9 | } 10 | .dropdown-menu > li > label:hover, 11 | .dropdown-menu > li > label:focus { 12 | text-decoration: none; 13 | color: #262626; 14 | background-color: #f5f5f5; 15 | } 16 | .dropdown-menu > li > input:checked ~ label, 17 | .dropdown-menu > li > input:checked ~ label:hover, 18 | .dropdown-menu > li > input:checked ~ label:focus, 19 | .dropdown-menu > .active > label, 20 | .dropdown-menu > .active > label:hover, 21 | .dropdown-menu > .active > label:focus { 22 | color: #ffffff; 23 | text-decoration: none; 24 | outline: 0; 25 | background-color: #428bca; 26 | } 27 | .dropdown-menu > li > input[disabled] ~ label, 28 | .dropdown-menu > li > input[disabled] ~ label:hover, 29 | .dropdown-menu > li > input[disabled] ~ label:focus, 30 | .dropdown-menu > .disabled > label, 31 | .dropdown-menu > .disabled > label:hover, 32 | .dropdown-menu > .disabled > label:focus { 33 | color: #999999; 34 | } 35 | .dropdown-menu > li > input[disabled] ~ label:hover, 36 | .dropdown-menu > li > input[disabled] ~ label:focus, 37 | .dropdown-menu > .disabled > label:hover, 38 | .dropdown-menu > .disabled > label:focus { 39 | text-decoration: none; 40 | background-color: transparent; 41 | background-image: none; 42 | filter: progid:DXImageTransform.Microsoft.gradient(enabled = false); 43 | cursor: not-allowed; 44 | } 45 | .dropdown-menu > li > label { 46 | margin-bottom: 0; 47 | cursor: pointer; 48 | } 49 | .dropdown-menu > li > input[type="radio"], 50 | .dropdown-menu > li > input[type="checkbox"] { 51 | display: none; 52 | position: absolute; 53 | top: -9999em; 54 | left: -9999em; 55 | } 56 | .dropdown-menu > li > label:focus, 57 | .dropdown-menu > li > input:focus ~ label { 58 | outline: thin dotted; 59 | outline: 5px auto -webkit-focus-ring-color; 60 | outline-offset: -2px; 61 | } 62 | .dropdown-menu.pull-right { 63 | right: 0; 64 | left: auto; 65 | } 66 | .dropdown-menu.pull-top { 67 | bottom: 100%; 68 | top: auto; 69 | margin: 0 0 2px; 70 | -webkit-box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175); 71 | box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175); 72 | } 73 | .dropdown-menu.pull-center { 74 | right: 50%; 75 | left: auto; 76 | } 77 | .dropdown-menu.pull-middle { 78 | right: 100%; 79 | margin: 0 2px 0 0; 80 | box-shadow: -5px 0 10px rgba(0, 0, 0, 0.2); 81 | left: auto; 82 | } 83 | .dropdown-menu.pull-middle.pull-right { 84 | right: auto; 85 | left: 100%; 86 | margin: 0 0 0 2px; 87 | box-shadow: 5px 0 10px rgba(0, 0, 0, 0.2); 88 | } 89 | .dropdown-menu.pull-middle.pull-center { 90 | right: 50%; 91 | margin: 0; 92 | box-shadow: 0 0 10px rgba(0, 0, 0, 0.2); 93 | } 94 | .dropdown-menu.bullet { 95 | margin-top: 8px; 96 | } 97 | .dropdown-menu.bullet:before { 98 | width: 0; 99 | height: 0; 100 | content: ''; 101 | display: inline-block; 102 | position: absolute; 103 | border-color: transparent; 104 | border-style: solid; 105 | -webkit-transform: rotate(360deg); 106 | border-width: 0 7px 7px; 107 | border-bottom-color: #cccccc; 108 | border-bottom-color: rgba(0, 0, 0, 0.15); 109 | top: -7px; 110 | left: 9px; 111 | } 112 | .dropdown-menu.bullet:after { 113 | width: 0; 114 | height: 0; 115 | content: ''; 116 | display: inline-block; 117 | position: absolute; 118 | border-color: transparent; 119 | border-style: solid; 120 | -webkit-transform: rotate(360deg); 121 | border-width: 0 6px 6px; 122 | border-bottom-color: #ffffff; 123 | top: -6px; 124 | left: 10px; 125 | } 126 | .dropdown-menu.bullet.pull-right:before { 127 | left: auto; 128 | right: 9px; 129 | } 130 | .dropdown-menu.bullet.pull-right:after { 131 | left: auto; 132 | right: 10px; 133 | } 134 | .dropdown-menu.bullet.pull-top { 135 | margin-top: 0; 136 | margin-bottom: 8px; 137 | } 138 | .dropdown-menu.bullet.pull-top:before { 139 | top: auto; 140 | bottom: -7px; 141 | border-bottom-width: 0; 142 | border-top-width: 7px; 143 | border-top-color: #cccccc; 144 | border-top-color: rgba(0, 0, 0, 0.15); 145 | } 146 | .dropdown-menu.bullet.pull-top:after { 147 | top: auto; 148 | bottom: -6px; 149 | border-bottom: none; 150 | border-top-width: 6px; 151 | border-top-color: #ffffff; 152 | } 153 | .dropdown-menu.bullet.pull-center:before { 154 | left: auto; 155 | right: 50%; 156 | margin-right: -7px; 157 | } 158 | .dropdown-menu.bullet.pull-center:after { 159 | left: auto; 160 | right: 50%; 161 | margin-right: -6px; 162 | } 163 | .dropdown-menu.bullet.pull-middle { 164 | margin-right: 8px; 165 | } 166 | .dropdown-menu.bullet.pull-middle:before { 167 | top: 50%; 168 | left: 100%; 169 | right: auto; 170 | margin-top: -7px; 171 | border-right-width: 0; 172 | border-bottom-color: transparent; 173 | border-top-width: 7px; 174 | border-left-color: #cccccc; 175 | border-left-color: rgba(0, 0, 0, 0.15); 176 | } 177 | .dropdown-menu.bullet.pull-middle:after { 178 | top: 50%; 179 | left: 100%; 180 | right: auto; 181 | margin-top: -6px; 182 | border-right-width: 0; 183 | border-bottom-color: transparent; 184 | border-top-width: 6px; 185 | border-left-color: #ffffff; 186 | } 187 | .dropdown-menu.bullet.pull-middle.pull-right { 188 | margin-right: 0; 189 | margin-left: 8px; 190 | } 191 | .dropdown-menu.bullet.pull-middle.pull-right:before { 192 | left: -7px; 193 | border-left-width: 0; 194 | border-right-width: 7px; 195 | border-right-color: #cccccc; 196 | border-right-color: rgba(0, 0, 0, 0.15); 197 | } 198 | .dropdown-menu.bullet.pull-middle.pull-right:after { 199 | left: -6px; 200 | border-left-width: 0; 201 | border-right-width: 6px; 202 | border-right-color: #ffffff; 203 | } 204 | .dropdown-menu.bullet.pull-middle.pull-center { 205 | margin-left: 0; 206 | margin-right: 0; 207 | } 208 | .dropdown-menu.bullet.pull-middle.pull-center:before { 209 | border: none; 210 | display: none; 211 | } 212 | .dropdown-menu.bullet.pull-middle.pull-center:after { 213 | border: none; 214 | display: none; 215 | } 216 | .dropdown-submenu { 217 | position: relative; 218 | } 219 | .dropdown-submenu > .dropdown-menu { 220 | top: 0; 221 | left: 100%; 222 | margin-top: -6px; 223 | margin-left: -1px; 224 | border-top-left-radius: 0; 225 | } 226 | .dropdown-submenu > a:before { 227 | display: block; 228 | float: right; 229 | width: 0; 230 | height: 0; 231 | content: ""; 232 | margin-top: 6px; 233 | margin-right: -8px; 234 | border-width: 4px 0 4px 4px; 235 | border-style: solid; 236 | border-left-style: dashed; 237 | border-top-color: transparent; 238 | border-bottom-color: transparent; 239 | } 240 | @media (max-width: 767px) { 241 | .navbar-nav .dropdown-submenu > a:before { 242 | margin-top: 8px; 243 | border-color: inherit; 244 | border-style: solid; 245 | border-width: 4px 4px 0; 246 | border-left-color: transparent; 247 | border-right-color: transparent; 248 | } 249 | .navbar-nav .dropdown-submenu > a { 250 | padding-left: 40px; 251 | } 252 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > a, 253 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > label { 254 | padding-left: 35px; 255 | } 256 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > a, 257 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > label { 258 | padding-left: 45px; 259 | } 260 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a, 261 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label { 262 | padding-left: 55px; 263 | } 264 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a, 265 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label { 266 | padding-left: 65px; 267 | } 268 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a, 269 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label { 270 | padding-left: 75px; 271 | } 272 | } 273 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a, 274 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover, 275 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus { 276 | background-color: #e7e7e7; 277 | color: #555555; 278 | } 279 | @media (max-width: 767px) { 280 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before { 281 | border-top-color: #555555; 282 | } 283 | } 284 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a, 285 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover, 286 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus { 287 | background-color: #080808; 288 | color: #ffffff; 289 | } 290 | @media (max-width: 767px) { 291 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before { 292 | border-top-color: #ffffff; 293 | } 294 | } 295 | -------------------------------------------------------------------------------- /lab3/viskit/static/js/dropdowns-enhancement.js: -------------------------------------------------------------------------------- 1 | /* ======================================================================== 2 | * Bootstrap Dropdowns Enhancement: dropdowns-enhancement.js v3.1.1 (Beta 1) 3 | * http://behigh.github.io/bootstrap_dropdowns_enhancement/ 4 | * ======================================================================== 5 | * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) 6 | * ======================================================================== */ 7 | 8 | (function($) { 9 | "use strict"; 10 | 11 | var toggle = '[data-toggle="dropdown"]', 12 | disabled = '.disabled, :disabled', 13 | backdrop = '.dropdown-backdrop', 14 | menuClass = 'dropdown-menu', 15 | subMenuClass = 'dropdown-submenu', 16 | namespace = '.bs.dropdown.data-api', 17 | eventNamespace = '.bs.dropdown', 18 | openClass = 'open', 19 | touchSupport = 'ontouchstart' in document.documentElement, 20 | opened; 21 | 22 | 23 | function Dropdown(element) { 24 | $(element).on('click' + eventNamespace, this.toggle) 25 | } 26 | 27 | var proto = Dropdown.prototype; 28 | 29 | proto.toggle = function(event) { 30 | var $element = $(this); 31 | 32 | if ($element.is(disabled)) return; 33 | 34 | var $parent = getParent($element); 35 | var isActive = $parent.hasClass(openClass); 36 | var isSubMenu = $parent.hasClass(subMenuClass); 37 | var menuTree = isSubMenu ? getSubMenuParents($parent) : null; 38 | 39 | closeOpened(event, menuTree); 40 | 41 | if (!isActive) { 42 | if (!menuTree) 43 | menuTree = [$parent]; 44 | 45 | if (touchSupport && !$parent.closest('.navbar-nav').length && !menuTree[0].find(backdrop).length) { 46 | // if mobile we use a backdrop because click events don't delegate 47 | $('
').appendTo(menuTree[0]).on('click', closeOpened) 48 | } 49 | 50 | for (var i = 0, s = menuTree.length; i < s; i++) { 51 | if (!menuTree[i].hasClass(openClass)) { 52 | menuTree[i].addClass(openClass); 53 | positioning(menuTree[i].children('.' + menuClass), menuTree[i]); 54 | } 55 | } 56 | opened = menuTree[0]; 57 | } 58 | 59 | return false; 60 | }; 61 | 62 | proto.keydown = function (e) { 63 | if (!/(38|40|27)/.test(e.keyCode)) return; 64 | 65 | var $this = $(this); 66 | 67 | e.preventDefault(); 68 | e.stopPropagation(); 69 | 70 | if ($this.is('.disabled, :disabled')) return; 71 | 72 | var $parent = getParent($this); 73 | var isActive = $parent.hasClass('open'); 74 | 75 | if (!isActive || (isActive && e.keyCode == 27)) { 76 | if (e.which == 27) $parent.find(toggle).trigger('focus'); 77 | return $this.trigger('click') 78 | } 79 | 80 | var desc = ' li:not(.divider):visible a'; 81 | var desc1 = 'li:not(.divider):visible > input:not(disabled) ~ label'; 82 | var $items = $parent.find(desc1 + ', ' + '[role="menu"]' + desc + ', [role="listbox"]' + desc); 83 | 84 | if (!$items.length) return; 85 | 86 | var index = $items.index($items.filter(':focus')); 87 | 88 | if (e.keyCode == 38 && index > 0) index--; // up 89 | if (e.keyCode == 40 && index < $items.length - 1) index++; // down 90 | if (!~index) index = 0; 91 | 92 | $items.eq(index).trigger('focus') 93 | }; 94 | 95 | proto.change = function (e) { 96 | 97 | var 98 | $parent, 99 | $menu, 100 | $toggle, 101 | selector, 102 | text = '', 103 | $items; 104 | 105 | $menu = $(this).closest('.' + menuClass); 106 | 107 | $toggle = $menu.parent().find('[data-label-placement]'); 108 | 109 | if (!$toggle || !$toggle.length) { 110 | $toggle = $menu.parent().find(toggle); 111 | } 112 | 113 | if (!$toggle || !$toggle.length || $toggle.data('placeholder') === false) 114 | return; // do nothing, no control 115 | 116 | ($toggle.data('placeholder') == undefined && $toggle.data('placeholder', $.trim($toggle.text()))); 117 | text = $.data($toggle[0], 'placeholder'); 118 | 119 | $items = $menu.find('li > input:checked'); 120 | 121 | if ($items.length) { 122 | text = []; 123 | $items.each(function () { 124 | var str = $(this).parent().find('label').eq(0), 125 | label = str.find('.data-label'); 126 | 127 | if (label.length) { 128 | var p = $('

'); 129 | p.append(label.clone()); 130 | str = p.html(); 131 | } 132 | else { 133 | str = str.html(); 134 | } 135 | 136 | 137 | str && text.push($.trim(str)); 138 | }); 139 | 140 | text = text.length < 4 ? text.join(', ') : text.length + ' selected'; 141 | } 142 | 143 | var caret = $toggle.find('.caret'); 144 | 145 | $toggle.html(text || ' '); 146 | if (caret.length) 147 | $toggle.append(' ') && caret.appendTo($toggle); 148 | 149 | }; 150 | 151 | function positioning($menu, $control) { 152 | if ($menu.hasClass('pull-center')) { 153 | $menu.css('margin-right', $menu.outerWidth() / -2); 154 | } 155 | 156 | if ($menu.hasClass('pull-middle')) { 157 | $menu.css('margin-top', ($menu.outerHeight() / -2) - ($control.outerHeight() / 2)); 158 | } 159 | } 160 | 161 | function closeOpened(event, menuTree) { 162 | if (opened) { 163 | 164 | if (!menuTree) { 165 | menuTree = [opened]; 166 | } 167 | 168 | var parent; 169 | 170 | if (opened[0] !== menuTree[0][0]) { 171 | parent = opened; 172 | } else { 173 | parent = menuTree[menuTree.length - 1]; 174 | if (parent.parent().hasClass(menuClass)) { 175 | parent = parent.parent(); 176 | } 177 | } 178 | 179 | parent.find('.' + openClass).removeClass(openClass); 180 | 181 | if (parent.hasClass(openClass)) 182 | parent.removeClass(openClass); 183 | 184 | if (parent === opened) { 185 | opened = null; 186 | $(backdrop).remove(); 187 | } 188 | } 189 | } 190 | 191 | function getSubMenuParents($submenu) { 192 | var result = [$submenu]; 193 | var $parent; 194 | while (!$parent || $parent.hasClass(subMenuClass)) { 195 | $parent = ($parent || $submenu).parent(); 196 | if ($parent.hasClass(menuClass)) { 197 | $parent = $parent.parent(); 198 | } 199 | if ($parent.children(toggle)) { 200 | result.unshift($parent); 201 | } 202 | } 203 | return result; 204 | } 205 | 206 | function getParent($this) { 207 | var selector = $this.attr('data-target'); 208 | 209 | if (!selector) { 210 | selector = $this.attr('href'); 211 | selector = selector && /#[A-Za-z]/.test(selector) && selector.replace(/.*(?=#[^\s]*$)/, ''); //strip for ie7 212 | } 213 | 214 | var $parent = selector && $(selector); 215 | 216 | return $parent && $parent.length ? $parent : $this.parent() 217 | } 218 | 219 | // DROPDOWN PLUGIN DEFINITION 220 | // ========================== 221 | 222 | var old = $.fn.dropdown; 223 | 224 | $.fn.dropdown = function (option) { 225 | return this.each(function () { 226 | var $this = $(this); 227 | var data = $this.data('bs.dropdown'); 228 | 229 | if (!data) $this.data('bs.dropdown', (data = new Dropdown(this))); 230 | if (typeof option == 'string') data[option].call($this); 231 | }) 232 | }; 233 | 234 | $.fn.dropdown.Constructor = Dropdown; 235 | 236 | $.fn.dropdown.clearMenus = function(e) { 237 | $(backdrop).remove(); 238 | $('.' + openClass + ' ' + toggle).each(function () { 239 | var $parent = getParent($(this)); 240 | var relatedTarget = { relatedTarget: this }; 241 | if (!$parent.hasClass('open')) return; 242 | $parent.trigger(e = $.Event('hide' + eventNamespace, relatedTarget)); 243 | if (e.isDefaultPrevented()) return; 244 | $parent.removeClass('open').trigger('hidden' + eventNamespace, relatedTarget); 245 | }); 246 | return this; 247 | }; 248 | 249 | 250 | // DROPDOWN NO CONFLICT 251 | // ==================== 252 | 253 | $.fn.dropdown.noConflict = function () { 254 | $.fn.dropdown = old; 255 | return this 256 | }; 257 | 258 | 259 | $(document).off(namespace) 260 | .on('click' + namespace, closeOpened) 261 | .on('click' + namespace, toggle, proto.toggle) 262 | .on('click' + namespace, '.dropdown-menu > li > input[type="checkbox"] ~ label, .dropdown-menu > li > input[type="checkbox"], .dropdown-menu.noclose > li', function (e) { 263 | e.stopPropagation() 264 | }) 265 | .on('change' + namespace, '.dropdown-menu > li > input[type="checkbox"], .dropdown-menu > li > input[type="radio"]', proto.change) 266 | .on('keydown' + namespace, toggle + ', [role="menu"], [role="listbox"]', proto.keydown) 267 | }(jQuery)); -------------------------------------------------------------------------------- /lab4/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Deep RL Bootcamp Organizers. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /lab4/alg_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | from utils import * 16 | 17 | 18 | # ============================== 19 | # Shared utilities 20 | # ============================== 21 | 22 | def compute_cumulative_returns(rewards, baselines, discount): 23 | # This method builds up the cumulative sum of discounted rewards for each time step: 24 | # R[t] = sum_{t'>=t} γ^(t'-t)*r_t' 25 | # Note that we use γ^(t'-t) instead of γ^t'. This gives us a biased gradient but lower variance 26 | returns = [] 27 | # Use the last baseline prediction to back up 28 | cum_return = baselines[-1] 29 | for reward in rewards[::-1]: 30 | cum_return = cum_return * discount + reward 31 | returns.append(cum_return) 32 | return returns[::-1] 33 | 34 | 35 | def compute_advantages(rewards, baselines, discount, gae_lambda): 36 | # Given returns R_t and baselines b(s_t), compute (generalized) advantage estimate A_t 37 | deltas = rewards + discount * baselines[1:] - baselines[:-1] 38 | advs = [] 39 | cum_adv = 0 40 | multiplier = discount * gae_lambda 41 | for delta in deltas[::-1]: 42 | cum_adv = cum_adv * multiplier + delta 43 | advs.append(cum_adv) 44 | return advs[::-1] 45 | 46 | 47 | def compute_pg_vars(trajs, policy, baseline, discount, gae_lambda): 48 | """ 49 | Compute chainer variables needed for various policy gradient algorithms 50 | """ 51 | for traj in trajs: 52 | # Include the last observation here, in case the trajectory is not finished 53 | baselines = baseline.predict(np.concatenate( 54 | [traj["observations"], [traj["last_observation"]]])) 55 | if traj['finished']: 56 | # If already finished, the future cumulative rewards starting from the final state is 0 57 | baselines[-1] = 0. 58 | # This is useful when fitting baselines. It uses the baseline prediction of the last state value to perform 59 | # Bellman backup if the trajectory is not finished. 60 | traj['returns'] = compute_cumulative_returns( 61 | traj['rewards'], baselines, discount) 62 | traj['advantages'] = compute_advantages( 63 | traj['rewards'], baselines, discount, gae_lambda) 64 | traj['baselines'] = baselines[:-1] 65 | 66 | # First, we compute a flattened list of observations, actions, and advantages 67 | all_obs = np.concatenate([traj['observations'] for traj in trajs], axis=0) 68 | all_acts = np.concatenate([traj['actions'] for traj in trajs], axis=0) 69 | all_advs = np.concatenate([traj['advantages'] for traj in trajs], axis=0) 70 | all_dists = { 71 | k: np.concatenate([traj['distributions'][k] for traj in trajs], axis=0) 72 | for k in trajs[0]['distributions'].keys() 73 | } 74 | 75 | # Normalizing the advantage values can make the algorithm more robust to reward scaling 76 | all_advs = (all_advs - np.mean(all_advs)) / (np.std(all_advs) + 1e-8) 77 | 78 | # Form chainer variables 79 | all_obs = Variable(all_obs) 80 | all_acts = Variable(all_acts) 81 | all_advs = Variable(all_advs.astype(np.float32, copy=False)) 82 | all_dists = policy.distribution.from_dict( 83 | {k: Variable(v) for k, v in all_dists.items()}) 84 | 85 | return all_obs, all_acts, all_advs, all_dists 86 | 87 | 88 | # ============================== 89 | # Helper methods for logging 90 | # ============================== 91 | 92 | def log_reward_statistics(env): 93 | # keep unwrapping until we get the monitor 94 | while not isinstance(env, gym.wrappers.Monitor): # and not isinstance() 95 | if not isinstance(env, gym.Wrapper): 96 | assert False 97 | env = env.env 98 | # env.unwrapped 99 | assert isinstance(env, gym.wrappers.Monitor) 100 | all_stats = None 101 | for _ in range(10): 102 | try: 103 | all_stats = gym.wrappers.monitoring.load_results(env.directory) 104 | except FileNotFoundError: 105 | time.sleep(1) 106 | continue 107 | if all_stats is not None: 108 | episode_rewards = all_stats['episode_rewards'] 109 | episode_lengths = all_stats['episode_lengths'] 110 | 111 | recent_episode_rewards = episode_rewards[-100:] 112 | recent_episode_lengths = episode_lengths[-100:] 113 | 114 | if len(recent_episode_rewards) > 0: 115 | logger.logkv('AverageReturn', np.mean(recent_episode_rewards)) 116 | logger.logkv('MinReturn', np.min(recent_episode_rewards)) 117 | logger.logkv('MaxReturn', np.max(recent_episode_rewards)) 118 | logger.logkv('StdReturn', np.std(recent_episode_rewards)) 119 | logger.logkv('AverageEpisodeLength', 120 | np.mean(recent_episode_lengths)) 121 | logger.logkv('MinEpisodeLength', np.min(recent_episode_lengths)) 122 | logger.logkv('MaxEpisodeLength', np.max(recent_episode_lengths)) 123 | logger.logkv('StdEpisodeLength', np.std(recent_episode_lengths)) 124 | 125 | logger.logkv('TotalNEpisodes', len(episode_rewards)) 126 | logger.logkv('TotalNSamples', np.sum(episode_lengths)) 127 | 128 | 129 | def log_baseline_statistics(trajs): 130 | # Specifically, compute the explained variance, defined as 131 | baselines = np.concatenate([traj['baselines'] for traj in trajs]) 132 | returns = np.concatenate([traj['returns'] for traj in trajs]) 133 | logger.logkv('ExplainedVariance', 134 | explained_variance_1d(baselines, returns)) 135 | 136 | 137 | def log_action_distribution_statistics(dists): 138 | with chainer.no_backprop_mode(): 139 | entropy = F.mean(dists.entropy()).data 140 | logger.logkv('Entropy', entropy) 141 | logger.logkv('Perplexity', np.exp(entropy)) 142 | if isinstance(dists, Gaussian): 143 | logger.logkv('AveragePolicyStd', F.mean( 144 | F.exp(dists.log_stds)).data) 145 | for idx in range(dists.log_stds.shape[-1]): 146 | logger.logkv('AveragePolicyStd[{}]'.format( 147 | idx), F.mean(F.exp(dists.log_stds[..., idx])).data) 148 | elif isinstance(dists, Categorical): 149 | probs = F.mean(F.softmax(dists.logits), axis=0).data 150 | for idx in range(len(probs)): 151 | logger.logkv('AveragePolicyProb[{}]'.format(idx), probs[idx]) 152 | -------------------------------------------------------------------------------- /lab4/algs.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | from pg import pg 16 | from trpo import trpo 17 | from a2c import a2c 18 | -------------------------------------------------------------------------------- /lab4/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | viskit_port=$("$DIR/findport.py" 5000 1) 4 | xhost=xhost 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | if [[ $(uname) == 'Darwin' ]]; then 12 | # if xhost not defined, check 13 | if ! hash $xhost 2>/dev/null; then 14 | xhost=/opt/X11/bin/xhost 15 | if [ ! -f $xhost ]; then 16 | echo "xhost not found!" 17 | exit 18 | fi 19 | fi 20 | ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}') 21 | $xhost + $ip >/dev/null 22 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 23 | -e DISPLAY=$ip:0 \ 24 | -v "$DIR":/root/code/bootcamp_pg \ 25 | -ti dementrock/deeprlbootcamp \ 26 | ${1-/bin/bash} "${@:2}" 27 | $xhost - $ip >/dev/null 28 | elif [[ $(uname) == 'Linux' ]]; then 29 | $xhost +local:root >/dev/null 30 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 31 | -e DISPLAY=$DISPLAY \ 32 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 33 | -v "$DIR":/root/code/bootcamp_pg \ 34 | -ti dementrock/deeprlbootcamp \ 35 | ${1-/bin/bash} "${@:2}" 36 | $xhost -local:root >/dev/null 37 | else 38 | echo "This script only supports macOS or Linux" 39 | fi 40 | -------------------------------------------------------------------------------- /lab4/docker_run_vnc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | vnc_port=$("$DIR/findport.py" 3000 1) 4 | viskit_port=$("$DIR/findport.py" 5000 1) 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284" 12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 13 | -v "$DIR":/root/code/bootcamp_pg \ 14 | -ti dementrock/deeprlbootcamp \ 15 | ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}" 16 | -------------------------------------------------------------------------------- /lab4/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | channels: 3 | - menpo 4 | - soumith 5 | dependencies: 6 | - python==3.5.3 7 | - opencv3=3.1.0 8 | - numpy==1.13.1 9 | - scipy==0.19.1 10 | - pip: 11 | - gym==0.9.2 12 | - chainer==2.0.1 13 | - ipdb==0.10.3 14 | - tblib==1.3.2 15 | - atari_py==0.1.1 16 | - Pillow==4.2.1 17 | - PyOpenGL==3.1.0 18 | - cloudpickle==0.3.1 19 | - click==6.7 20 | - python-dateutil==2.6.1 21 | - pyyaml==3.12 22 | - easydict==1.7 23 | - boto3==1.4.4 24 | - mako==1.0.7 25 | - redis==2.10.5 26 | - Flask==0.12.2 27 | - plotly==2.0.12 28 | - tqdm==4.14.0 29 | - cupy==1.0.1; 'linux' in sys_platform 30 | - cached-property==1.3.0 31 | - h5py==2.7.0 32 | -------------------------------------------------------------------------------- /lab4/experiments/run_a2c_breakout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | from algs import a2c 18 | from env_makers import EnvMaker 19 | from models import CategoricalCNNPolicy 20 | from utils import SnapshotSaver 21 | import numpy as np 22 | import os 23 | import logger 24 | 25 | log_dir = "data/local/a2c-breakout" 26 | 27 | np.random.seed(42) 28 | 29 | # Clean up existing logs 30 | os.system("rm -rf {}".format(log_dir)) 31 | 32 | with logger.session(log_dir): 33 | env_maker = EnvMaker('BreakoutNoFrameskip-v4') 34 | env = env_maker.make() 35 | policy = CategoricalCNNPolicy( 36 | env.observation_space, env.action_space, env.spec) 37 | vf = policy.create_vf() 38 | a2c( 39 | env=env, 40 | env_maker=env_maker, 41 | n_envs=16, 42 | policy=policy, 43 | vf=vf, 44 | snapshot_saver=SnapshotSaver(log_dir, interval=10), 45 | ) 46 | -------------------------------------------------------------------------------- /lab4/experiments/run_a2c_pong.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | from algs import a2c 18 | from env_makers import EnvMaker 19 | from models import CategoricalCNNPolicy 20 | from utils import SnapshotSaver 21 | import numpy as np 22 | import os 23 | import logger 24 | 25 | log_dir = "data/local/a2c-pong" 26 | 27 | np.random.seed(42) 28 | 29 | # Clean up existing logs 30 | os.system("rm -rf {}".format(log_dir)) 31 | 32 | with logger.session(log_dir): 33 | env_maker = EnvMaker('PongNoFrameskip-v4') 34 | env = env_maker.make() 35 | policy = CategoricalCNNPolicy( 36 | env.observation_space, env.action_space, env.spec) 37 | vf = policy.create_vf() 38 | a2c( 39 | env=env, 40 | env_maker=env_maker, 41 | n_envs=16, 42 | policy=policy, 43 | vf=vf, 44 | snapshot_saver=SnapshotSaver(log_dir, interval=10), 45 | ) 46 | -------------------------------------------------------------------------------- /lab4/experiments/run_a2c_pong_warm_start.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | from utils import SnapshotSaver 18 | import numpy as np 19 | import os 20 | import logger 21 | import pickle 22 | 23 | log_dir = "data/local/a2c-pong-warm-start" 24 | 25 | np.random.seed(42) 26 | 27 | # Clean up existing logs 28 | os.system("rm -rf {}".format(log_dir)) 29 | 30 | with logger.session(log_dir): 31 | with open("pong_warm_start.pkl", "rb") as f: 32 | state = pickle.load(f) 33 | saver = SnapshotSaver(log_dir, interval=10) 34 | alg_state = state['alg_state'] 35 | env = alg_state['env_maker'].make() 36 | alg = state['alg'] 37 | alg(env=env, snapshot_saver=saver, **alg_state) 38 | -------------------------------------------------------------------------------- /lab4/experiments/run_pg_cartpole.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import chainer 18 | 19 | from algs import pg 20 | from env_makers import EnvMaker 21 | from models import CategoricalMLPPolicy, MLPBaseline 22 | from utils import SnapshotSaver 23 | import numpy as np 24 | import os 25 | import logger 26 | 27 | log_dir = "data/local/pg-cartpole" 28 | 29 | np.random.seed(42) 30 | 31 | # Clean up existing logs 32 | os.system("rm -rf {}".format(log_dir)) 33 | 34 | with logger.session(log_dir): 35 | env_maker = EnvMaker('CartPole-v0') 36 | env = env_maker.make() 37 | policy = CategoricalMLPPolicy(observation_space=env.observation_space, action_space=env.action_space, 38 | env_spec=env.spec) 39 | baseline = MLPBaseline(observation_space=env.observation_space, action_space=env.action_space, 40 | env_spec=env.spec) 41 | pg( 42 | env=env, 43 | env_maker=env_maker, 44 | n_envs=16, 45 | policy=policy, 46 | baseline=baseline, 47 | batch_size=2000, 48 | n_iters=100, 49 | snapshot_saver=SnapshotSaver(log_dir), 50 | optimizer=chainer.optimizers.Adam(1e-2) 51 | ) 52 | -------------------------------------------------------------------------------- /lab4/experiments/run_trpo_cartpole.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | from algs import trpo 18 | from env_makers import EnvMaker 19 | from models import CategoricalMLPPolicy, MLPBaseline 20 | from utils import SnapshotSaver 21 | import numpy as np 22 | import os 23 | import logger 24 | 25 | log_dir = "data/local/trpo-cartpole" 26 | 27 | np.random.seed(42) 28 | 29 | # Clean up existing logs 30 | os.system("rm -rf {}".format(log_dir)) 31 | 32 | with logger.session(log_dir): 33 | env_maker = EnvMaker('CartPole-v0') 34 | env = env_maker.make() 35 | policy = CategoricalMLPPolicy( 36 | observation_space=env.observation_space, 37 | action_space=env.action_space, 38 | env_spec=env.spec 39 | ) 40 | baseline = MLPBaseline( 41 | observation_space=env.observation_space, 42 | action_space=env.action_space, 43 | env_spec=env.spec 44 | ) 45 | trpo( 46 | env=env, 47 | env_maker=env_maker, 48 | n_envs=16, 49 | policy=policy, 50 | baseline=baseline, 51 | batch_size=2000, 52 | n_iters=100, 53 | snapshot_saver=SnapshotSaver(log_dir) 54 | ) 55 | -------------------------------------------------------------------------------- /lab4/experiments/run_trpo_half_cheetah.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import chainer 18 | 19 | from algs import trpo 20 | from env_makers import EnvMaker 21 | from models import GaussianMLPPolicy, MLPBaseline 22 | from utils import SnapshotSaver 23 | import numpy as np 24 | import os 25 | import logger 26 | 27 | log_dir = "data/local/trpo-half-cheetah" 28 | 29 | np.random.seed(42) 30 | 31 | # Clean up existing logs 32 | os.system("rm -rf {}".format(log_dir)) 33 | 34 | with logger.session(log_dir): 35 | env_maker = EnvMaker('RoboschoolHalfCheetah-v1') 36 | env = env_maker.make() 37 | policy = GaussianMLPPolicy( 38 | observation_space=env.observation_space, 39 | action_space=env.action_space, 40 | env_spec=env.spec, 41 | hidden_sizes=(256, 64), 42 | hidden_nonlinearity=chainer.functions.tanh, 43 | ) 44 | baseline = MLPBaseline( 45 | observation_space=env.observation_space, 46 | action_space=env.action_space, 47 | env_spec=env.spec, 48 | hidden_sizes=(256, 64), 49 | hidden_nonlinearity=chainer.functions.tanh, 50 | ) 51 | trpo( 52 | env=env, 53 | env_maker=env_maker, 54 | n_envs=16, 55 | policy=policy, 56 | baseline=baseline, 57 | batch_size=5000, 58 | n_iters=5000, 59 | snapshot_saver=SnapshotSaver(log_dir, interval=10), 60 | ) 61 | -------------------------------------------------------------------------------- /lab4/experiments/run_trpo_pendulum.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import chainer 18 | 19 | from algs import trpo 20 | from env_makers import EnvMaker 21 | from models import GaussianMLPPolicy, MLPBaseline 22 | from utils import SnapshotSaver 23 | import numpy as np 24 | import os 25 | import logger 26 | 27 | log_dir = "data/local/trpo-pendulum" 28 | 29 | np.random.seed(42) 30 | 31 | # Clean up existing logs 32 | os.system("rm -rf {}".format(log_dir)) 33 | 34 | with logger.session(log_dir): 35 | env_maker = EnvMaker('Pendulum-v0') 36 | env = env_maker.make() 37 | policy = GaussianMLPPolicy( 38 | observation_space=env.observation_space, 39 | action_space=env.action_space, 40 | env_spec=env.spec, 41 | hidden_sizes=(64, 64), 42 | hidden_nonlinearity=chainer.functions.tanh, 43 | ) 44 | baseline = MLPBaseline( 45 | observation_space=env.observation_space, 46 | action_space=env.action_space, 47 | env_spec=env.spec, 48 | hidden_sizes=(64, 64), 49 | hidden_nonlinearity=chainer.functions.tanh, 50 | ) 51 | trpo( 52 | env=env, 53 | env_maker=env_maker, 54 | n_envs=16, 55 | policy=policy, 56 | baseline=baseline, 57 | batch_size=10000, 58 | n_iters=100, 59 | snapshot_saver=SnapshotSaver(log_dir), 60 | ) 61 | -------------------------------------------------------------------------------- /lab4/findport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | # 17 | # Usage: findport.py 3000 100 18 | # 19 | from __future__ import print_function 20 | import socket 21 | from contextlib import closing 22 | import sys 23 | 24 | if len(sys.argv) != 3: 25 | print("Usage: {} ".format(sys.argv[0])) 26 | sys.exit(1) 27 | 28 | base = int(sys.argv[1]) 29 | increment = int(sys.argv[2]) 30 | 31 | 32 | def find_free_port(): 33 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 34 | for port in range(base, 65536, increment): 35 | try: 36 | s.bind(('', port)) 37 | return s.getsockname()[1] 38 | except socket.error: 39 | continue 40 | 41 | 42 | print(find_free_port()) 43 | -------------------------------------------------------------------------------- /lab4/lab4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab4/lab4.pdf -------------------------------------------------------------------------------- /lab4/launch_bg_screen_buffer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | killall() { 4 | kill -INT "$xvfb_pid" 5 | kill -INT "$x11vnc_pid" 6 | exit 7 | } 8 | 9 | trap killall SIGINT 10 | trap killall SIGTERM 11 | trap killall SIGKILL 12 | 13 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$! 14 | 15 | mkdir ~/.x11vnc 16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd 17 | 18 | command="${1-/bin/bash} ${@:2}" 19 | 20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!" 21 | 22 | DISPLAY=:99 $command 23 | 24 | killall 25 | -------------------------------------------------------------------------------- /lab4/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Code adapted from OpenAI Baselines: https://github.com/openai/baselines 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | """ 14 | 15 | 16 | import datetime 17 | from collections import OrderedDict 18 | import os 19 | import sys 20 | import shutil 21 | import os.path as osp 22 | import json 23 | 24 | import dateutil.tz 25 | 26 | LOG_OUTPUT_FORMATS = ['stdout', 'log', 'json'] 27 | 28 | DEBUG = 10 29 | INFO = 20 30 | WARN = 30 31 | ERROR = 40 32 | 33 | DISABLED = 50 34 | 35 | 36 | class OutputFormat(object): 37 | def writekvs(self, kvs): 38 | """ 39 | Write key-value pairs 40 | """ 41 | raise NotImplementedError 42 | 43 | def writeseq(self, args): 44 | """ 45 | Write a sequence of other data (e.g. a logging message) 46 | """ 47 | pass 48 | 49 | def close(self): 50 | return 51 | 52 | 53 | class HumanOutputFormat(OutputFormat): 54 | def __init__(self, file): 55 | self.file = file 56 | 57 | def writekvs(self, kvs): 58 | # Create strings for printing 59 | key2str = OrderedDict() 60 | for (key, val) in kvs.items(): 61 | valstr = '%-8.5g' % (val,) if hasattr(val, '__float__') else val 62 | key2str[self._truncate(key)] = self._truncate(valstr) 63 | 64 | # Find max widths 65 | keywidth = max(map(len, key2str.keys())) 66 | valwidth = max(map(len, key2str.values())) 67 | 68 | # Write out the data 69 | dashes = '-' * (keywidth + valwidth + 7) 70 | lines = [dashes] 71 | for (key, val) in key2str.items(): 72 | lines.append('| %s%s | %s%s |' % ( 73 | key, 74 | ' ' * (keywidth - len(key)), 75 | val, 76 | ' ' * (valwidth - len(val)), 77 | )) 78 | lines.append(dashes) 79 | self.file.write('\n'.join(lines) + '\n') 80 | 81 | # Flush the output to the file 82 | self.file.flush() 83 | 84 | def _truncate(self, s): 85 | return s[:20] + '...' if len(s) > 23 else s 86 | 87 | def writeseq(self, args): 88 | for arg in args: 89 | self.file.write(arg) 90 | self.file.write('\n') 91 | self.file.flush() 92 | 93 | 94 | class JSONOutputFormat(OutputFormat): 95 | def __init__(self, file): 96 | self.file = file 97 | 98 | def writekvs(self, kvs): 99 | for k, v in kvs.items(): 100 | if hasattr(v, 'dtype'): 101 | v = v.tolist() 102 | kvs[k] = float(v) 103 | self.file.write(json.dumps(kvs) + '\n') 104 | self.file.flush() 105 | 106 | 107 | def make_output_format(format, ev_dir): 108 | os.makedirs(ev_dir, exist_ok=True) 109 | if format == 'stdout': 110 | return HumanOutputFormat(sys.stdout) 111 | elif format == 'log': 112 | log_file = open(osp.join(ev_dir, 'log.txt'), 'at') 113 | return HumanOutputFormat(log_file) 114 | elif format == 'json': 115 | json_file = open(osp.join(ev_dir, 'progress.json'), 'at') 116 | return JSONOutputFormat(json_file) 117 | else: 118 | raise ValueError('Unknown format specified: %s' % (format,)) 119 | 120 | 121 | # ================================================================ 122 | # API 123 | # ================================================================ 124 | 125 | 126 | def logkv(key, val): 127 | """ 128 | Log a value of some diagnostic 129 | Call this once for each diagnostic quantity, each iteration 130 | """ 131 | Logger.CURRENT.logkv(key, val) 132 | 133 | 134 | def dumpkvs(): 135 | """ 136 | Write all of the diagnostics from the current iteration 137 | 138 | level: int. (see old_logger.py docs) If the global logger level is higher than 139 | the level argument here, don't print to stdout. 140 | """ 141 | Logger.CURRENT.dumpkvs() 142 | 143 | 144 | # for backwards compatibility 145 | record_tabular = logkv 146 | dump_tabular = dumpkvs 147 | 148 | 149 | def log(*args, level=INFO): 150 | """ 151 | Write the sequence of args, with no separators, to the console and output files (if you've configured an output file). 152 | """ 153 | Logger.CURRENT.log(*args, level=level) 154 | 155 | 156 | def debug(*args): 157 | log(*args, level=DEBUG) 158 | 159 | 160 | def info(*args): 161 | log(*args, level=INFO) 162 | 163 | 164 | def warn(*args): 165 | log(*args, level=WARN) 166 | 167 | 168 | def error(*args): 169 | log(*args, level=ERROR) 170 | 171 | 172 | def set_level(level): 173 | """ 174 | Set logging threshold on current logger. 175 | """ 176 | Logger.CURRENT.set_level(level) 177 | 178 | 179 | def get_level(): 180 | """ 181 | Set logging threshold on current logger. 182 | """ 183 | return Logger.CURRENT.level 184 | 185 | 186 | def get_dir(): 187 | """ 188 | Get directory that log files are being written to. 189 | will be None if there is no output directory (i.e., if you didn't call start) 190 | """ 191 | return Logger.CURRENT.get_dir() 192 | 193 | 194 | def get_expt_dir(): 195 | sys.stderr.write( 196 | "get_expt_dir() is Deprecated. Switch to get_dir() [%s]\n" % (get_dir(),)) 197 | return get_dir() 198 | 199 | 200 | # ================================================================ 201 | # Backend 202 | # ================================================================ 203 | 204 | 205 | class Logger(object): 206 | # A logger with no output files. (See right below class definition) 207 | DEFAULT = None 208 | # So that you can still log to the terminal without setting up any output files 209 | CURRENT = None # Current logger being used by the free functions above 210 | 211 | def __init__(self, dir, output_formats): 212 | self.name2val = OrderedDict() # values this iteration 213 | self.level = INFO 214 | self.dir = dir 215 | self.output_formats = output_formats 216 | 217 | # Logging API, forwarded 218 | # ---------------------------------------- 219 | def logkv(self, key, val): 220 | self.name2val[key] = val 221 | 222 | def dumpkvs(self): 223 | for fmt in self.output_formats: 224 | fmt.writekvs(self.name2val) 225 | self.name2val.clear() 226 | 227 | def log(self, *args, level=INFO): 228 | now = datetime.datetime.now(dateutil.tz.tzlocal()) 229 | timestamp = now.strftime('[%Y-%m-%d %H:%M:%S.%f %Z] ') 230 | if self.level <= level: 231 | self._do_log((timestamp,) + args) 232 | 233 | # Configuration 234 | # ---------------------------------------- 235 | def set_level(self, level): 236 | self.level = level 237 | 238 | def get_dir(self): 239 | return self.dir 240 | 241 | def close(self): 242 | for fmt in self.output_formats: 243 | fmt.close() 244 | 245 | # Misc 246 | # ---------------------------------------- 247 | def _do_log(self, args): 248 | for fmt in self.output_formats: 249 | fmt.writeseq(args) 250 | 251 | 252 | # ================================================================ 253 | 254 | Logger.DEFAULT = Logger( 255 | output_formats=[HumanOutputFormat(sys.stdout)], dir=None) 256 | Logger.CURRENT = Logger.DEFAULT 257 | 258 | 259 | class session(object): 260 | """ 261 | Context manager that sets up the loggers for an experiment. 262 | """ 263 | 264 | CURRENT = None # Set to a LoggerContext object using enter/exit or context manager 265 | 266 | def __init__(self, dir, format_strs=None): 267 | self.dir = dir 268 | if format_strs is None: 269 | format_strs = LOG_OUTPUT_FORMATS 270 | output_formats = [make_output_format(f, dir) for f in format_strs] 271 | Logger.CURRENT = Logger(dir=dir, output_formats=output_formats) 272 | 273 | def __enter__(self): 274 | os.makedirs(self.evaluation_dir(), exist_ok=True) 275 | output_formats = [make_output_format( 276 | f, self.evaluation_dir()) for f in LOG_OUTPUT_FORMATS] 277 | Logger.CURRENT = Logger(dir=self.dir, output_formats=output_formats) 278 | 279 | def __exit__(self, *args): 280 | Logger.CURRENT.close() 281 | Logger.CURRENT = Logger.DEFAULT 282 | 283 | def evaluation_dir(self): 284 | return self.dir 285 | 286 | 287 | # ================================================================ 288 | 289 | 290 | def _demo(): 291 | info("hi") 292 | debug("shouldn't appear") 293 | set_level(DEBUG) 294 | debug("should appear") 295 | dir = "/tmp/testlogging" 296 | if os.path.exists(dir): 297 | shutil.rmtree(dir) 298 | with session(dir=dir): 299 | record_tabular("a", 3) 300 | record_tabular("b", 2.5) 301 | dump_tabular() 302 | record_tabular("b", -2.5) 303 | record_tabular("a", 5.5) 304 | dump_tabular() 305 | info("^^^ should see a = 5.5") 306 | 307 | record_tabular("b", -2.5) 308 | dump_tabular() 309 | 310 | record_tabular("a", "longasslongasslongasslongasslongasslongassvalue") 311 | dump_tabular() 312 | 313 | 314 | if __name__ == "__main__": 315 | _demo() 316 | -------------------------------------------------------------------------------- /lab4/pg.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | """ 12 | 13 | from alg_utils import * 14 | from simplepg.simple_utils import test_once, nprs 15 | import tests.pg_tests 16 | 17 | 18 | def pg(env, env_maker, policy, baseline, n_envs=mp.cpu_count(), last_iter=-1, n_iters=100, batch_size=1000, 19 | optimizer=chainer.optimizers.Adam(), discount=0.99, gae_lambda=0.97, snapshot_saver=None): 20 | """ 21 | This method implements policy gradient algorithm. 22 | :param env: An environment instance, which should have the same class as what env_maker.make() returns. 23 | :param env_maker: An object such that calling env_maker.make() will generate a new environment. 24 | :param policy: A stochastic policy which we will be optimizing. 25 | :param baseline: A baseline used for variance reduction and estimating future returns for unfinished trajectories. 26 | :param n_envs: Number of environments running simultaneously. 27 | :param last_iter: The index of the last iteration. This is normally -1 when starting afresh, but may be different when 28 | loaded from a snapshot. 29 | :param n_iters: The total number of iterations to run. 30 | :param batch_size: The number of samples used per iteration. 31 | :param optimizer: A Chainer optimizer instance. By default we use the Adam algorithm with learning rate 1e-3. 32 | :param discount: Discount factor. 33 | :param gae_lambda: Lambda parameter used for generalized advantage estimation. 34 | :param snapshot_saver: An object for saving snapshots. 35 | """ 36 | 37 | if getattr(optimizer, 'target', None) is not policy: 38 | optimizer.setup(policy) 39 | 40 | logger.info("Starting env pool") 41 | with EnvPool(env_maker, n_envs=n_envs) as env_pool: 42 | for iter in range(last_iter + 1, n_iters): 43 | logger.info("Starting iteration {}".format(iter)) 44 | logger.logkv('Iteration', iter) 45 | 46 | logger.info("Start collecting samples") 47 | trajs = parallel_collect_samples(env_pool, policy, batch_size) 48 | 49 | logger.info("Computing input variables for policy optimization") 50 | all_obs, all_acts, all_advs, _ = compute_pg_vars( 51 | trajs, policy, baseline, discount, gae_lambda 52 | ) 53 | 54 | # Begin policy update 55 | 56 | # Now, you need to implement the computation of the policy gradient 57 | # The policy gradient is given by -1/T \sum_t \nabla_\theta(log(p_\theta(a_t|s_t))) * A_t 58 | # Note the negative sign in the front, since optimizers are most often minimizing a loss rather 59 | # This is the same as \nabla_\theta(-1/T \sum_t log(p_\theta(a_t|s_t)) * A_t) = \nabla_\theta(L), where L is the surrogate loss term 60 | 61 | logger.info("Computing policy gradient") 62 | 63 | # Methods that may be useful: 64 | # - `dists.logli(actions)' returns the log probability of the actions under the distribution `dists'. 65 | # This method returns a chainer variable. 66 | 67 | dists = policy.compute_dists(all_obs) 68 | 69 | def compute_surr_loss(dists, all_acts, all_advs): 70 | """ 71 | :param dists: An instance of subclass of Distribution 72 | :param all_acts: A chainer variable, which should be a matrix of size N * |A| 73 | :param all_advs: A chainer variable, which should be a vector of size N 74 | :return: A chainer variable, which should be a scalar 75 | """ 76 | "*** YOUR CODE HERE ***" 77 | return -F.mean(dists.logli(all_acts) * all_advs) 78 | 79 | test_once(compute_surr_loss) 80 | 81 | surr_loss = compute_surr_loss(dists, all_acts, all_advs) 82 | 83 | # reset gradients stored in the policy parameters 84 | policy.cleargrads() 85 | surr_loss.backward() 86 | 87 | # apply the computed gradient 88 | optimizer.update() 89 | 90 | # Update baseline 91 | logger.info("Updating baseline") 92 | baseline.update(trajs) 93 | 94 | # log statistics 95 | logger.info("Computing logging information") 96 | logger.logkv('SurrLoss', surr_loss.data) 97 | log_action_distribution_statistics(dists) 98 | log_reward_statistics(env) 99 | log_baseline_statistics(trajs) 100 | logger.dumpkvs() 101 | 102 | if snapshot_saver is not None: 103 | logger.info("Saving snapshot") 104 | snapshot_saver.save_state( 105 | iter, 106 | dict( 107 | alg=pg, 108 | alg_state=dict( 109 | env_maker=env_maker, 110 | policy=policy, 111 | baseline=baseline, 112 | n_envs=n_envs, 113 | last_iter=iter, 114 | n_iters=n_iters, 115 | batch_size=batch_size, 116 | optimizer=optimizer, 117 | discount=discount, 118 | gae_lambda=gae_lambda 119 | ) 120 | ) 121 | ) 122 | -------------------------------------------------------------------------------- /lab4/pong_warm_start.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab4/pong_warm_start.pkl -------------------------------------------------------------------------------- /lab4/scripts/resume_training.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | from utils import SnapshotSaver 18 | import click 19 | import logger 20 | 21 | 22 | @click.command() 23 | @click.argument("dir") # , "Directory which contains snapshot files") 24 | @click.option("--interval", help="Interval between saving snapshots", type=int, default=10) 25 | def main(dir, interval): 26 | with logger.session(dir): 27 | saver = SnapshotSaver(dir, interval=interval) 28 | state = saver.get_state() 29 | alg_state = state['alg_state'] 30 | env = alg_state['env_maker'].make() 31 | alg = state['alg'] 32 | alg(env=env, snapshot_saver=saver, **alg_state) 33 | 34 | 35 | if __name__ == "__main__": 36 | main() 37 | -------------------------------------------------------------------------------- /lab4/scripts/setup_xquartz.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Check if XQuartz is installed 3 | 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@" 5 | 6 | app_dir=/Applications/Utilities/XQuartz.app 7 | 8 | if [ -d $app_dir ]; then 9 | # Check installed version 10 | app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString) 11 | if [ $app_version == "2.7.11" ]; then 12 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 13 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 14 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 15 | echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!" 16 | exit 17 | else 18 | read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response 19 | case "$response" in 20 | [yY][eE][sS]|[yY]) 21 | ;; 22 | *) 23 | exit 24 | ;; 25 | esac 26 | fi 27 | fi 28 | 29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg 30 | dmg_path=/tmp/xquartz.dmg 31 | echo "Downloading dmg from $url..." 32 | /usr/bin/curl -L -o $dmg_path $url 33 | echo "Mounting dmg file..." 34 | hdiutil mount $dmg_path 35 | echo "Installing..." 36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg -target / 37 | 38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 41 | 42 | echo "Done! Make sure to log out and then log back in for the changes to take effect." 43 | -------------------------------------------------------------------------------- /lab4/scripts/sim_policy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | from utils import SnapshotSaver 18 | import click 19 | import time 20 | import os 21 | 22 | 23 | @click.command() 24 | @click.argument("dir") 25 | def main(dir): 26 | env = None 27 | while True: 28 | saver = SnapshotSaver(dir) 29 | state = saver.get_state() 30 | if state is None: 31 | time.sleep(1) 32 | continue 33 | alg_state = state['alg_state'] 34 | if env is None: 35 | env = alg_state['env_maker'].make() 36 | policy = alg_state['policy'] 37 | ob = env.reset() 38 | done = False 39 | while not done: 40 | action, _ = policy.get_action(ob) 41 | ob, _, done, _ = env.step(action) 42 | env.render() 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /lab4/scripts/test_environment_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | def main(): 18 | import roboschool 19 | import gym 20 | import chainer 21 | env = gym.make('CartPole-v0') 22 | env.reset() 23 | env.step(env.action_space.sample()) 24 | env = gym.make('RoboschoolHalfCheetah-v1') 25 | env.reset() 26 | env.step(env.action_space.sample()) 27 | print("Your environment has been successfully set up!") 28 | 29 | 30 | if __name__ == "__main__": 31 | main() 32 | -------------------------------------------------------------------------------- /lab4/simplepg/point_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | from gym import Env 16 | from gym.envs.registration import register 17 | from gym.utils import seeding 18 | from gym import spaces 19 | from gym.envs.classic_control.cartpole import CartPoleEnv 20 | import numpy as np 21 | 22 | 23 | class PointEnv(Env): 24 | metadata = { 25 | 'render.modes': ['human', 'rgb_array'], 26 | 'video.frames_per_second': 50 27 | } 28 | 29 | def __init__(self): 30 | self.action_space = spaces.Box(low=-1, high=1, shape=(2,)) 31 | self.observation_space = spaces.Box(low=-1, high=1, shape=(2,)) 32 | 33 | self._seed() 34 | self.viewer = None 35 | self.state = None 36 | 37 | def _seed(self, seed=None): 38 | self.np_random, seed = seeding.np_random(seed) 39 | return [seed] 40 | 41 | def _step(self, action): 42 | action = np.clip(action, -0.025, 0.025) 43 | self.state = np.clip(self.state + action, -1, 1) 44 | return np.array(self.state), -np.linalg.norm(self.state), False, {} 45 | 46 | def _reset(self): 47 | while True: 48 | self.state = self.np_random.uniform(low=-1, high=1, size=(2,)) 49 | # Sample states that are far away 50 | if np.linalg.norm(self.state) > 0.9: 51 | break 52 | return np.array(self.state) 53 | 54 | # def _render(self, mode='human', close=False): 55 | # pass 56 | 57 | def _render(self, mode='human', close=False): 58 | if close: 59 | if self.viewer is not None: 60 | self.viewer.close() 61 | self.viewer = None 62 | return 63 | 64 | screen_width = 800 65 | screen_height = 800 66 | 67 | if self.viewer is None: 68 | from gym.envs.classic_control import rendering 69 | self.viewer = rendering.Viewer(screen_width, screen_height) 70 | 71 | agent = rendering.make_circle( 72 | min(screen_height, screen_width) * 0.03) 73 | origin = rendering.make_circle( 74 | min(screen_height, screen_width) * 0.03) 75 | trans = rendering.Transform(translation=(0, 0)) 76 | agent.add_attr(trans) 77 | self.trans = trans 78 | agent.set_color(1, 0, 0) 79 | origin.set_color(0, 0, 0) 80 | origin.add_attr(rendering.Transform( 81 | translation=(screen_width // 2, screen_height // 2))) 82 | self.viewer.add_geom(agent) 83 | self.viewer.add_geom(origin) 84 | 85 | # self.trans.set_translation(0, 0) 86 | self.trans.set_translation( 87 | (self.state[0] + 1) / 2 * screen_width, 88 | (self.state[1] + 1) / 2 * screen_height, 89 | ) 90 | 91 | return self.viewer.render(return_rgb_array=mode == 'rgb_array') 92 | 93 | 94 | register( 95 | 'Point-v0', 96 | entry_point='simplepg.point_env:PointEnv', 97 | timestep_limit=40, 98 | ) 99 | -------------------------------------------------------------------------------- /lab4/simplepg/rollout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | 17 | import click 18 | import numpy as np 19 | import gym 20 | 21 | from simplepg.simple_utils import include_bias, weighted_sample 22 | 23 | 24 | def point_get_action(theta, ob, rng=np.random): 25 | ob_1 = include_bias(ob) 26 | mean = theta.dot(ob_1) 27 | return rng.normal(loc=mean, scale=1.) 28 | 29 | 30 | def cartpole_get_action(theta, ob, rng=np.random): 31 | ob_1 = include_bias(ob) 32 | logits = ob_1.dot(theta.T) 33 | return weighted_sample(logits, rng=rng) 34 | 35 | 36 | @click.command() 37 | @click.argument("env_id", type=str, default="Point-v0") 38 | def main(env_id): 39 | # Register the environment 40 | rng = np.random.RandomState(42) 41 | 42 | if env_id == 'CartPole-v0': 43 | env = gym.make('CartPole-v0') 44 | get_action = cartpole_get_action 45 | obs_dim = env.observation_space.shape[0] 46 | action_dim = env.action_space.n 47 | elif env_id == 'Point-v0': 48 | from simplepg import point_env 49 | env = gym.make('Point-v0') 50 | get_action = point_get_action 51 | obs_dim = env.observation_space.shape[0] 52 | action_dim = env.action_space.shape[0] 53 | else: 54 | raise ValueError( 55 | "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'") 56 | 57 | env.seed(42) 58 | 59 | # Initialize parameters 60 | theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1)) 61 | 62 | while True: 63 | ob = env.reset() 64 | done = False 65 | # Only render the first trajectory 66 | # Collect a new trajectory 67 | rewards = [] 68 | while not done: 69 | action = get_action(theta, ob, rng=rng) 70 | next_ob, rew, done, _ = env.step(action) 71 | ob = next_ob 72 | env.render() 73 | rewards.append(rew) 74 | 75 | print("Episode reward: %.2f" % np.sum(rewards)) 76 | 77 | 78 | if __name__ == "__main__": 79 | main() 80 | -------------------------------------------------------------------------------- /lab4/simplepg/simple_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | import numpy as np 16 | import scipy.special 17 | import chainer 18 | 19 | 20 | # Compute gradient approximately using finite difference 21 | def numerical_grad(f, x, eps=1e-8): 22 | grad = np.zeros_like(x) 23 | for i in range(len(x)): 24 | xplus = np.array(x) 25 | xplus[i] += eps 26 | fplus = f(xplus) 27 | xminus = np.array(x) 28 | xminus[i] -= eps 29 | fminus = f(xminus) 30 | grad[i] = (fplus - fminus) / (2 * eps) 31 | return grad 32 | 33 | 34 | def gradient_check(f, g, x): 35 | # Test the implementation of g(x) = df/dx 36 | # Perform numerical differentiation and test it 37 | g_num = numerical_grad(f, x) 38 | g_test = g(x) 39 | try: 40 | np.testing.assert_allclose(g_num, g_test, rtol=1e-5) 41 | print("Gradient check passed!") 42 | except AssertionError as e: 43 | print(e) 44 | print("Error: Gradient check didn't pass!") 45 | exit() 46 | 47 | 48 | def log_softmax(logits): 49 | return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True) 50 | 51 | 52 | def softmax(logits): 53 | x = logits 54 | x = x - np.max(x, axis=-1, keepdims=True) 55 | x = np.exp(x) 56 | return x / np.sum(x, axis=-1, keepdims=True) 57 | 58 | 59 | def weighted_sample(logits, rng=np.random): 60 | weights = softmax(logits) 61 | return min( 62 | int(np.sum(rng.uniform() > np.cumsum(weights))), 63 | len(weights) - 1 64 | ) 65 | 66 | 67 | def include_bias(x): 68 | # Add a constant term (1.0) to each entry in x 69 | return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1) 70 | 71 | 72 | _tested = set() 73 | _tests = dict() 74 | 75 | nprs = np.random.RandomState 76 | 77 | 78 | def register_test(fn_name, kwargs, desired_output=None): 79 | assert fn_name not in _tests 80 | _tests[fn_name] = (kwargs, desired_output) 81 | 82 | 83 | def assert_allclose(a, b): 84 | if isinstance(a, (np.ndarray, float, int)): 85 | np.testing.assert_allclose(a, b, rtol=1e-5) 86 | elif isinstance(a, (tuple, list)): 87 | assert isinstance(b, (tuple, list)) 88 | assert len(a) == len(b) 89 | for a_i, b_i in zip(a, b): 90 | assert_allclose(a_i, b_i) 91 | elif isinstance(a, chainer.Variable): 92 | assert isinstance(b, chainer.Variable) 93 | assert_allclose(a.data, b.data) 94 | else: 95 | raise NotImplementedError 96 | 97 | 98 | def test_once(fn): 99 | module = fn.__module__ 100 | name = fn.__name__ 101 | key = module + "." + name 102 | if key in _tested: 103 | return 104 | assert key in _tests, "Test for %s not found!" % key 105 | kwargs, desired_output = _tests[key] 106 | _tested.add(key) 107 | 108 | if callable(kwargs): 109 | kwargs = kwargs() 110 | 111 | if callable(desired_output): 112 | desired_output = desired_output() 113 | 114 | if desired_output is None: 115 | print("Desired output for %s:" % key, repr(fn(**kwargs))) 116 | exit() 117 | else: 118 | try: 119 | output = fn(**kwargs) 120 | assert_allclose(desired_output, output) 121 | print("Test for %s passed!" % key) 122 | except AssertionError as e: 123 | print(e) 124 | print("Error: test for %s didn't pass!" % key) 125 | exit() 126 | -------------------------------------------------------------------------------- /lab4/tests/a2c_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | from simplepg.simple_utils import register_test, nprs 16 | import numpy as np 17 | from chainer import Variable 18 | 19 | register_test( 20 | "a2c.compute_returns_advantages", 21 | kwargs=lambda: dict( 22 | rewards=nprs(0).uniform(size=(5, 2)), 23 | dones=nprs(1).choice([True, False], size=(5, 2)), 24 | values=nprs(2).uniform(size=(5, 2)), 25 | next_values=nprs(3).uniform(size=(2,)), 26 | discount=0.99, 27 | ), 28 | desired_output=lambda: ( 29 | np.array([[1.14554925, 1.25462372], 30 | [0.60276338, 0.54488318], 31 | [2.33579066, 1.90456042], 32 | [1.93145037, 1.2713801], 33 | [1.50895268, 0.38344152]]), 34 | np.array([[0.70955434, 1.22869749], 35 | [0.0531009, 0.10956079], 36 | [1.91542286, 1.5742256], 37 | [1.72680173, 0.65210914], 38 | [1.20929801, 0.11661424]]) 39 | ) 40 | ) 41 | 42 | register_test( 43 | "a2c.compute_total_loss", 44 | kwargs=lambda: dict( 45 | logli=Variable(nprs(0).uniform(size=(10,)).astype(np.float32)), 46 | all_advs=Variable(nprs(1).uniform(size=(10,)).astype(np.float32)), 47 | ent_coeff=nprs(2).uniform(), 48 | ent=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)), 49 | vf_loss_coeff=nprs(4).uniform(), 50 | all_returns=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)), 51 | all_values=Variable(nprs(6).uniform(size=(10,)).astype(np.float32)), 52 | ), 53 | desired_output=lambda: ( 54 | Variable(np.array(-0.4047563076019287, dtype=np.float32)), 55 | Variable(np.array(0.22883716225624084, dtype=np.float32)), 56 | Variable(np.array(-0.1834639459848404, dtype=np.float32)) 57 | ) 58 | ) 59 | -------------------------------------------------------------------------------- /lab4/tests/pg_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | from chainer import Variable 16 | 17 | from simplepg.simple_utils import register_test, nprs 18 | from utils import Gaussian 19 | import numpy as np 20 | 21 | register_test( 22 | "pg.compute_surr_loss", 23 | kwargs=lambda: dict( 24 | dists=Gaussian( 25 | means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)), 26 | log_stds=Variable(nprs(1).uniform( 27 | size=(10, 3)).astype(np.float32)), 28 | ), 29 | all_acts=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)), 30 | all_advs=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)), 31 | ), 32 | desired_output=lambda: Variable( 33 | np.array(1.9201269149780273, dtype=np.float32)) 34 | ) 35 | -------------------------------------------------------------------------------- /lab4/tests/simplepg_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | from simplepg.simple_utils import register_test, nprs 16 | import numpy as np 17 | 18 | register_test( 19 | "__main__.compute_update", 20 | kwargs=lambda: dict( 21 | discount=0.99, 22 | R_tplus1=1.0, 23 | theta=nprs(0).uniform(size=(2, 2)), 24 | s_t=nprs(1).uniform(size=(1,)), 25 | a_t=nprs(2).choice(2), 26 | r_t=nprs(3).uniform(), 27 | b_t=nprs(4).uniform(), 28 | get_grad_logp_action=lambda theta, *_: theta * 2 29 | ), 30 | desired_output=lambda: ( 31 | 1.5407979025745755, 32 | np.array([[0.62978332, 0.82070564], [0.69169275, 0.62527314]]) 33 | ) 34 | ) 35 | 36 | register_test( 37 | "__main__.compute_baselines", 38 | kwargs=lambda: dict( 39 | all_returns=[ 40 | nprs(0).uniform(size=(10,)), 41 | nprs(1).uniform(size=(20,)), 42 | [], 43 | ], 44 | ), 45 | desired_output=lambda: np.array([0.61576628, 0.36728075, 0.]) 46 | ) 47 | 48 | register_test( 49 | "__main__.compute_fisher_matrix", 50 | kwargs=lambda: dict( 51 | theta=nprs(1).uniform(size=(2, 2)), 52 | get_grad_logp_action=lambda theta, ob, action: np.exp( 53 | theta) * np.linalg.norm(action), 54 | all_observations=list(nprs(2).uniform(size=(5, 1))), 55 | all_actions=list(nprs(3).choice(2, size=(5,))), 56 | ), 57 | desired_output=lambda: np.array([[0.92104469, 1.24739299, 0.60704379, 0.82124306], 58 | [1.24739299, 1.68937435, 59 | 0.82213401, 1.11222925], 60 | [0.60704379, 0.82213401, 61 | 0.40009151, 0.54126635], 62 | [0.82124306, 1.11222925, 0.54126635, 0.73225564]]) 63 | ) 64 | 65 | register_test( 66 | "__main__.compute_natural_gradient", 67 | kwargs=lambda: dict( 68 | F=nprs(0).uniform(size=(4, 4)), 69 | grad=nprs(1).uniform(size=(2, 2)), 70 | reg=1e-3, 71 | ), 72 | desired_output=lambda: np.array( 73 | [[-0.44691565, 0.5477328], [-0.20366472, 0.72267091]]) 74 | ) 75 | 76 | register_test( 77 | "__main__.compute_step_size", 78 | kwargs=lambda: dict( 79 | F=nprs(0).uniform(size=(2, 2)), 80 | natural_grad=nprs(1).uniform(size=(1, 2)), 81 | natural_step_size=1e-2, 82 | ), 83 | desired_output=lambda: 0.1607407366467048, 84 | ) 85 | -------------------------------------------------------------------------------- /lab4/tests/trpo_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | from simplepg.simple_utils import register_test, nprs 16 | import numpy as np 17 | from chainer import Variable 18 | 19 | from utils import Gaussian 20 | 21 | register_test( 22 | "trpo.compute_surr_loss", 23 | kwargs=lambda: dict( 24 | old_dists=Gaussian( 25 | means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)), 26 | log_stds=Variable(nprs(1).uniform( 27 | size=(10, 3)).astype(np.float32)), 28 | ), 29 | new_dists=Gaussian( 30 | means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)), 31 | log_stds=Variable(nprs(3).uniform( 32 | size=(10, 3)).astype(np.float32)), 33 | ), 34 | all_acts=Variable(nprs(4).uniform(size=(10, 3)).astype(np.float32)), 35 | all_advs=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)), 36 | ), 37 | desired_output=lambda: Variable( 38 | np.array(-0.5629823207855225, dtype=np.float32)) 39 | ) 40 | 41 | register_test( 42 | "trpo.compute_kl", 43 | kwargs=lambda: dict( 44 | old_dists=Gaussian( 45 | means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)), 46 | log_stds=Variable(nprs(1).uniform( 47 | size=(10, 3)).astype(np.float32)), 48 | ), 49 | new_dists=Gaussian( 50 | means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)), 51 | log_stds=Variable(nprs(3).uniform( 52 | size=(10, 3)).astype(np.float32)), 53 | ), 54 | ), 55 | desired_output=lambda: Variable( 56 | np.array(0.5306503176689148, dtype=np.float32)) 57 | ) 58 | -------------------------------------------------------------------------------- /lab4/viskit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/lab4/viskit/__init__.py -------------------------------------------------------------------------------- /lab4/viskit/core.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | """ 12 | 13 | 14 | import csv 15 | import itertools 16 | import json 17 | import os 18 | 19 | import numpy as np 20 | 21 | 22 | # from sandbox.rocky.utils.py_utils import AttrDict 23 | 24 | class AttrDict(dict): 25 | def __init__(self, *args, **kwargs): 26 | super(AttrDict, self).__init__(*args, **kwargs) 27 | self.__dict__ = self 28 | 29 | 30 | def unique(l): 31 | return list(set(l)) 32 | 33 | 34 | def flatten(l): 35 | return [item for sublist in l for item in sublist] 36 | 37 | 38 | def load_progress(progress_json_path, verbose=True): 39 | if verbose: 40 | print("Reading %s" % progress_json_path) 41 | entries = dict() 42 | rows = [] 43 | with open(progress_json_path, 'r') as f: 44 | lines = f.read().split('\n') 45 | for line in lines: 46 | if len(line) > 0: 47 | row = json.loads(line) 48 | rows.append(row) 49 | all_keys = set(k for row in rows for k in row.keys()) 50 | for k in all_keys: 51 | if k not in entries: 52 | entries[k] = [] 53 | for row in rows: 54 | if k in row: 55 | v = row[k] 56 | try: 57 | entries[k].append(float(v)) 58 | except: 59 | entries[k].append(np.nan) 60 | else: 61 | entries[k].append(np.nan) 62 | 63 | # entries[key] = [row.get(key, np.nan) for row in rows] 64 | # added_keys = set() 65 | # for k, v in row.items(): 66 | # if k not in entries: 67 | # entries[k] = [] 68 | # try: 69 | # entries[k].append(float(v)) 70 | # except: 71 | # entries[k].append(0.) 72 | # added_keys.add(k) 73 | # for k in entries.keys(): 74 | # if k not in added_keys: 75 | # entries[k].append(np.nan) 76 | entries = dict([(k, np.array(v)) for k, v in entries.items()]) 77 | return entries 78 | 79 | 80 | def flatten_dict(d): 81 | flat_params = dict() 82 | for k, v in d.items(): 83 | if isinstance(v, dict): 84 | v = flatten_dict(v) 85 | for subk, subv in flatten_dict(v).items(): 86 | flat_params[k + "." + subk] = subv 87 | else: 88 | flat_params[k] = v 89 | return flat_params 90 | 91 | 92 | def load_params(params_json_path): 93 | with open(params_json_path, 'r') as f: 94 | data = json.loads(f.read()) 95 | if "args_data" in data: 96 | del data["args_data"] 97 | if "exp_name" not in data: 98 | data["exp_name"] = params_json_path.split("/")[-2] 99 | return data 100 | 101 | 102 | def lookup(d, keys): 103 | if not isinstance(keys, list): 104 | keys = keys.split(".") 105 | for k in keys: 106 | if hasattr(d, "__getitem__"): 107 | if k in d: 108 | d = d[k] 109 | else: 110 | return None 111 | else: 112 | return None 113 | return d 114 | 115 | 116 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True): 117 | if isinstance(exp_folder_paths, str): 118 | exp_folder_paths = [exp_folder_paths] 119 | exps = [] 120 | for exp_folder_path in exp_folder_paths: 121 | exps += [x[0] for x in os.walk(exp_folder_path)] 122 | if verbose: 123 | print("finished walking exp folders") 124 | exps_data = [] 125 | for exp in exps: 126 | try: 127 | exp_path = exp 128 | variant_json_path = os.path.join(exp_path, "variant.json") 129 | progress_json_path = os.path.join(exp_path, "progress.json") 130 | progress = load_progress(progress_json_path, verbose=verbose) 131 | try: 132 | params = load_params(variant_json_path) 133 | except IOError: 134 | params = dict(exp_name="experiment") 135 | exps_data.append(AttrDict( 136 | progress=progress, params=params, flat_params=flatten_dict(params))) 137 | except IOError as e: 138 | if verbose: 139 | print(e) 140 | 141 | # a dictionary of all keys and types of values 142 | all_keys = dict() 143 | for data in exps_data: 144 | for key in data.flat_params.keys(): 145 | if key not in all_keys: 146 | all_keys[key] = type(data.flat_params[key]) 147 | 148 | # if any data does not have some key, specify the value of it 149 | if not ignore_missing_keys: 150 | default_values = dict() 151 | for data in exps_data: 152 | for key in sorted(all_keys.keys()): 153 | if key not in data.flat_params: 154 | if key not in default_values: 155 | default = None 156 | default_values[key] = default 157 | data.flat_params[key] = default_values[key] 158 | 159 | return exps_data 160 | 161 | 162 | def smart_repr(x): 163 | if isinstance(x, tuple): 164 | if len(x) == 0: 165 | return "tuple()" 166 | elif len(x) == 1: 167 | return "(%s,)" % smart_repr(x[0]) 168 | else: 169 | return "(" + ",".join(map(smart_repr, x)) + ")" 170 | else: 171 | if hasattr(x, "__call__"): 172 | return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__) 173 | else: 174 | return repr(x) 175 | 176 | 177 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1): 178 | try: 179 | stringified_pairs = sorted( 180 | map( 181 | eval, 182 | unique( 183 | flatten( 184 | [ 185 | list( 186 | map( 187 | smart_repr, 188 | list(d.flat_params.items()) 189 | ) 190 | ) 191 | for d in exps_data 192 | ] 193 | ) 194 | ) 195 | ), 196 | key=lambda x: ( 197 | tuple("" if it is None else str(it) for it in x), 198 | ) 199 | ) 200 | except Exception as e: 201 | print(e) 202 | import ipdb 203 | ipdb.set_trace() 204 | proposals = [(k, [x[1] for x in v]) 205 | for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])] 206 | filtered = [(k, v) for (k, v) in proposals if len(v) > l and all( 207 | [k.find(excluded_param) != 0 for excluded_param in excluded_params])] 208 | return filtered 209 | 210 | 211 | class Selector(object): 212 | def __init__(self, exps_data, filters=None, custom_filters=None): 213 | self._exps_data = exps_data 214 | if filters is None: 215 | self._filters = tuple() 216 | else: 217 | self._filters = tuple(filters) 218 | if custom_filters is None: 219 | self._custom_filters = [] 220 | else: 221 | self._custom_filters = custom_filters 222 | 223 | def where(self, k, v): 224 | return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters) 225 | 226 | def custom_filter(self, filter): 227 | return Selector(self._exps_data, self._filters, self._custom_filters + [filter]) 228 | 229 | def _check_exp(self, exp): 230 | # or exp.flat_params.get(k, None) is None 231 | return all( 232 | ((str(exp.flat_params.get(k, None)) == str(v) or ( 233 | k not in exp.flat_params)) for k, v in self._filters) 234 | ) and all(custom_filter(exp) for custom_filter in self._custom_filters) 235 | 236 | def extract(self): 237 | return list(filter(self._check_exp, self._exps_data)) 238 | 239 | def iextract(self): 240 | return filter(self._check_exp, self._exps_data) 241 | 242 | 243 | # Taken from plot.ly 244 | color_defaults = [ 245 | '#1f77b4', # muted blue 246 | '#ff7f0e', # safety orange 247 | '#2ca02c', # cooked asparagus green 248 | '#d62728', # brick red 249 | '#9467bd', # muted purple 250 | '#8c564b', # chestnut brown 251 | '#e377c2', # raspberry yogurt pink 252 | '#7f7f7f', # middle gray 253 | '#bcbd22', # curry yellow-green 254 | '#17becf' # blue-teal 255 | ] 256 | 257 | 258 | def hex_to_rgb(hex, opacity=1.0): 259 | if hex[0] == '#': 260 | hex = hex[1:] 261 | assert (len(hex) == 6) 262 | return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity) 263 | -------------------------------------------------------------------------------- /lab4/viskit/static/css/dropdowns-enhancement.css: -------------------------------------------------------------------------------- 1 | .dropdown-menu > li > label { 2 | display: block; 3 | padding: 3px 20px; 4 | clear: both; 5 | font-weight: normal; 6 | line-height: 1.42857143; 7 | color: #333333; 8 | white-space: nowrap; 9 | } 10 | .dropdown-menu > li > label:hover, 11 | .dropdown-menu > li > label:focus { 12 | text-decoration: none; 13 | color: #262626; 14 | background-color: #f5f5f5; 15 | } 16 | .dropdown-menu > li > input:checked ~ label, 17 | .dropdown-menu > li > input:checked ~ label:hover, 18 | .dropdown-menu > li > input:checked ~ label:focus, 19 | .dropdown-menu > .active > label, 20 | .dropdown-menu > .active > label:hover, 21 | .dropdown-menu > .active > label:focus { 22 | color: #ffffff; 23 | text-decoration: none; 24 | outline: 0; 25 | background-color: #428bca; 26 | } 27 | .dropdown-menu > li > input[disabled] ~ label, 28 | .dropdown-menu > li > input[disabled] ~ label:hover, 29 | .dropdown-menu > li > input[disabled] ~ label:focus, 30 | .dropdown-menu > .disabled > label, 31 | .dropdown-menu > .disabled > label:hover, 32 | .dropdown-menu > .disabled > label:focus { 33 | color: #999999; 34 | } 35 | .dropdown-menu > li > input[disabled] ~ label:hover, 36 | .dropdown-menu > li > input[disabled] ~ label:focus, 37 | .dropdown-menu > .disabled > label:hover, 38 | .dropdown-menu > .disabled > label:focus { 39 | text-decoration: none; 40 | background-color: transparent; 41 | background-image: none; 42 | filter: progid:DXImageTransform.Microsoft.gradient(enabled = false); 43 | cursor: not-allowed; 44 | } 45 | .dropdown-menu > li > label { 46 | margin-bottom: 0; 47 | cursor: pointer; 48 | } 49 | .dropdown-menu > li > input[type="radio"], 50 | .dropdown-menu > li > input[type="checkbox"] { 51 | display: none; 52 | position: absolute; 53 | top: -9999em; 54 | left: -9999em; 55 | } 56 | .dropdown-menu > li > label:focus, 57 | .dropdown-menu > li > input:focus ~ label { 58 | outline: thin dotted; 59 | outline: 5px auto -webkit-focus-ring-color; 60 | outline-offset: -2px; 61 | } 62 | .dropdown-menu.pull-right { 63 | right: 0; 64 | left: auto; 65 | } 66 | .dropdown-menu.pull-top { 67 | bottom: 100%; 68 | top: auto; 69 | margin: 0 0 2px; 70 | -webkit-box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175); 71 | box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175); 72 | } 73 | .dropdown-menu.pull-center { 74 | right: 50%; 75 | left: auto; 76 | } 77 | .dropdown-menu.pull-middle { 78 | right: 100%; 79 | margin: 0 2px 0 0; 80 | box-shadow: -5px 0 10px rgba(0, 0, 0, 0.2); 81 | left: auto; 82 | } 83 | .dropdown-menu.pull-middle.pull-right { 84 | right: auto; 85 | left: 100%; 86 | margin: 0 0 0 2px; 87 | box-shadow: 5px 0 10px rgba(0, 0, 0, 0.2); 88 | } 89 | .dropdown-menu.pull-middle.pull-center { 90 | right: 50%; 91 | margin: 0; 92 | box-shadow: 0 0 10px rgba(0, 0, 0, 0.2); 93 | } 94 | .dropdown-menu.bullet { 95 | margin-top: 8px; 96 | } 97 | .dropdown-menu.bullet:before { 98 | width: 0; 99 | height: 0; 100 | content: ''; 101 | display: inline-block; 102 | position: absolute; 103 | border-color: transparent; 104 | border-style: solid; 105 | -webkit-transform: rotate(360deg); 106 | border-width: 0 7px 7px; 107 | border-bottom-color: #cccccc; 108 | border-bottom-color: rgba(0, 0, 0, 0.15); 109 | top: -7px; 110 | left: 9px; 111 | } 112 | .dropdown-menu.bullet:after { 113 | width: 0; 114 | height: 0; 115 | content: ''; 116 | display: inline-block; 117 | position: absolute; 118 | border-color: transparent; 119 | border-style: solid; 120 | -webkit-transform: rotate(360deg); 121 | border-width: 0 6px 6px; 122 | border-bottom-color: #ffffff; 123 | top: -6px; 124 | left: 10px; 125 | } 126 | .dropdown-menu.bullet.pull-right:before { 127 | left: auto; 128 | right: 9px; 129 | } 130 | .dropdown-menu.bullet.pull-right:after { 131 | left: auto; 132 | right: 10px; 133 | } 134 | .dropdown-menu.bullet.pull-top { 135 | margin-top: 0; 136 | margin-bottom: 8px; 137 | } 138 | .dropdown-menu.bullet.pull-top:before { 139 | top: auto; 140 | bottom: -7px; 141 | border-bottom-width: 0; 142 | border-top-width: 7px; 143 | border-top-color: #cccccc; 144 | border-top-color: rgba(0, 0, 0, 0.15); 145 | } 146 | .dropdown-menu.bullet.pull-top:after { 147 | top: auto; 148 | bottom: -6px; 149 | border-bottom: none; 150 | border-top-width: 6px; 151 | border-top-color: #ffffff; 152 | } 153 | .dropdown-menu.bullet.pull-center:before { 154 | left: auto; 155 | right: 50%; 156 | margin-right: -7px; 157 | } 158 | .dropdown-menu.bullet.pull-center:after { 159 | left: auto; 160 | right: 50%; 161 | margin-right: -6px; 162 | } 163 | .dropdown-menu.bullet.pull-middle { 164 | margin-right: 8px; 165 | } 166 | .dropdown-menu.bullet.pull-middle:before { 167 | top: 50%; 168 | left: 100%; 169 | right: auto; 170 | margin-top: -7px; 171 | border-right-width: 0; 172 | border-bottom-color: transparent; 173 | border-top-width: 7px; 174 | border-left-color: #cccccc; 175 | border-left-color: rgba(0, 0, 0, 0.15); 176 | } 177 | .dropdown-menu.bullet.pull-middle:after { 178 | top: 50%; 179 | left: 100%; 180 | right: auto; 181 | margin-top: -6px; 182 | border-right-width: 0; 183 | border-bottom-color: transparent; 184 | border-top-width: 6px; 185 | border-left-color: #ffffff; 186 | } 187 | .dropdown-menu.bullet.pull-middle.pull-right { 188 | margin-right: 0; 189 | margin-left: 8px; 190 | } 191 | .dropdown-menu.bullet.pull-middle.pull-right:before { 192 | left: -7px; 193 | border-left-width: 0; 194 | border-right-width: 7px; 195 | border-right-color: #cccccc; 196 | border-right-color: rgba(0, 0, 0, 0.15); 197 | } 198 | .dropdown-menu.bullet.pull-middle.pull-right:after { 199 | left: -6px; 200 | border-left-width: 0; 201 | border-right-width: 6px; 202 | border-right-color: #ffffff; 203 | } 204 | .dropdown-menu.bullet.pull-middle.pull-center { 205 | margin-left: 0; 206 | margin-right: 0; 207 | } 208 | .dropdown-menu.bullet.pull-middle.pull-center:before { 209 | border: none; 210 | display: none; 211 | } 212 | .dropdown-menu.bullet.pull-middle.pull-center:after { 213 | border: none; 214 | display: none; 215 | } 216 | .dropdown-submenu { 217 | position: relative; 218 | } 219 | .dropdown-submenu > .dropdown-menu { 220 | top: 0; 221 | left: 100%; 222 | margin-top: -6px; 223 | margin-left: -1px; 224 | border-top-left-radius: 0; 225 | } 226 | .dropdown-submenu > a:before { 227 | display: block; 228 | float: right; 229 | width: 0; 230 | height: 0; 231 | content: ""; 232 | margin-top: 6px; 233 | margin-right: -8px; 234 | border-width: 4px 0 4px 4px; 235 | border-style: solid; 236 | border-left-style: dashed; 237 | border-top-color: transparent; 238 | border-bottom-color: transparent; 239 | } 240 | @media (max-width: 767px) { 241 | .navbar-nav .dropdown-submenu > a:before { 242 | margin-top: 8px; 243 | border-color: inherit; 244 | border-style: solid; 245 | border-width: 4px 4px 0; 246 | border-left-color: transparent; 247 | border-right-color: transparent; 248 | } 249 | .navbar-nav .dropdown-submenu > a { 250 | padding-left: 40px; 251 | } 252 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > a, 253 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > label { 254 | padding-left: 35px; 255 | } 256 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > a, 257 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > label { 258 | padding-left: 45px; 259 | } 260 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a, 261 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label { 262 | padding-left: 55px; 263 | } 264 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a, 265 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label { 266 | padding-left: 65px; 267 | } 268 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a, 269 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label { 270 | padding-left: 75px; 271 | } 272 | } 273 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a, 274 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover, 275 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus { 276 | background-color: #e7e7e7; 277 | color: #555555; 278 | } 279 | @media (max-width: 767px) { 280 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before { 281 | border-top-color: #555555; 282 | } 283 | } 284 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a, 285 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover, 286 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus { 287 | background-color: #080808; 288 | color: #ffffff; 289 | } 290 | @media (max-width: 767px) { 291 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before { 292 | border-top-color: #ffffff; 293 | } 294 | } 295 | -------------------------------------------------------------------------------- /lab4/viskit/static/js/dropdowns-enhancement.js: -------------------------------------------------------------------------------- 1 | /* ======================================================================== 2 | * Bootstrap Dropdowns Enhancement: dropdowns-enhancement.js v3.1.1 (Beta 1) 3 | * http://behigh.github.io/bootstrap_dropdowns_enhancement/ 4 | * ======================================================================== 5 | * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) 6 | * ======================================================================== */ 7 | 8 | (function($) { 9 | "use strict"; 10 | 11 | var toggle = '[data-toggle="dropdown"]', 12 | disabled = '.disabled, :disabled', 13 | backdrop = '.dropdown-backdrop', 14 | menuClass = 'dropdown-menu', 15 | subMenuClass = 'dropdown-submenu', 16 | namespace = '.bs.dropdown.data-api', 17 | eventNamespace = '.bs.dropdown', 18 | openClass = 'open', 19 | touchSupport = 'ontouchstart' in document.documentElement, 20 | opened; 21 | 22 | 23 | function Dropdown(element) { 24 | $(element).on('click' + eventNamespace, this.toggle) 25 | } 26 | 27 | var proto = Dropdown.prototype; 28 | 29 | proto.toggle = function(event) { 30 | var $element = $(this); 31 | 32 | if ($element.is(disabled)) return; 33 | 34 | var $parent = getParent($element); 35 | var isActive = $parent.hasClass(openClass); 36 | var isSubMenu = $parent.hasClass(subMenuClass); 37 | var menuTree = isSubMenu ? getSubMenuParents($parent) : null; 38 | 39 | closeOpened(event, menuTree); 40 | 41 | if (!isActive) { 42 | if (!menuTree) 43 | menuTree = [$parent]; 44 | 45 | if (touchSupport && !$parent.closest('.navbar-nav').length && !menuTree[0].find(backdrop).length) { 46 | // if mobile we use a backdrop because click events don't delegate 47 | $('
').appendTo(menuTree[0]).on('click', closeOpened) 48 | } 49 | 50 | for (var i = 0, s = menuTree.length; i < s; i++) { 51 | if (!menuTree[i].hasClass(openClass)) { 52 | menuTree[i].addClass(openClass); 53 | positioning(menuTree[i].children('.' + menuClass), menuTree[i]); 54 | } 55 | } 56 | opened = menuTree[0]; 57 | } 58 | 59 | return false; 60 | }; 61 | 62 | proto.keydown = function (e) { 63 | if (!/(38|40|27)/.test(e.keyCode)) return; 64 | 65 | var $this = $(this); 66 | 67 | e.preventDefault(); 68 | e.stopPropagation(); 69 | 70 | if ($this.is('.disabled, :disabled')) return; 71 | 72 | var $parent = getParent($this); 73 | var isActive = $parent.hasClass('open'); 74 | 75 | if (!isActive || (isActive && e.keyCode == 27)) { 76 | if (e.which == 27) $parent.find(toggle).trigger('focus'); 77 | return $this.trigger('click') 78 | } 79 | 80 | var desc = ' li:not(.divider):visible a'; 81 | var desc1 = 'li:not(.divider):visible > input:not(disabled) ~ label'; 82 | var $items = $parent.find(desc1 + ', ' + '[role="menu"]' + desc + ', [role="listbox"]' + desc); 83 | 84 | if (!$items.length) return; 85 | 86 | var index = $items.index($items.filter(':focus')); 87 | 88 | if (e.keyCode == 38 && index > 0) index--; // up 89 | if (e.keyCode == 40 && index < $items.length - 1) index++; // down 90 | if (!~index) index = 0; 91 | 92 | $items.eq(index).trigger('focus') 93 | }; 94 | 95 | proto.change = function (e) { 96 | 97 | var 98 | $parent, 99 | $menu, 100 | $toggle, 101 | selector, 102 | text = '', 103 | $items; 104 | 105 | $menu = $(this).closest('.' + menuClass); 106 | 107 | $toggle = $menu.parent().find('[data-label-placement]'); 108 | 109 | if (!$toggle || !$toggle.length) { 110 | $toggle = $menu.parent().find(toggle); 111 | } 112 | 113 | if (!$toggle || !$toggle.length || $toggle.data('placeholder') === false) 114 | return; // do nothing, no control 115 | 116 | ($toggle.data('placeholder') == undefined && $toggle.data('placeholder', $.trim($toggle.text()))); 117 | text = $.data($toggle[0], 'placeholder'); 118 | 119 | $items = $menu.find('li > input:checked'); 120 | 121 | if ($items.length) { 122 | text = []; 123 | $items.each(function () { 124 | var str = $(this).parent().find('label').eq(0), 125 | label = str.find('.data-label'); 126 | 127 | if (label.length) { 128 | var p = $('

'); 129 | p.append(label.clone()); 130 | str = p.html(); 131 | } 132 | else { 133 | str = str.html(); 134 | } 135 | 136 | 137 | str && text.push($.trim(str)); 138 | }); 139 | 140 | text = text.length < 4 ? text.join(', ') : text.length + ' selected'; 141 | } 142 | 143 | var caret = $toggle.find('.caret'); 144 | 145 | $toggle.html(text || ' '); 146 | if (caret.length) 147 | $toggle.append(' ') && caret.appendTo($toggle); 148 | 149 | }; 150 | 151 | function positioning($menu, $control) { 152 | if ($menu.hasClass('pull-center')) { 153 | $menu.css('margin-right', $menu.outerWidth() / -2); 154 | } 155 | 156 | if ($menu.hasClass('pull-middle')) { 157 | $menu.css('margin-top', ($menu.outerHeight() / -2) - ($control.outerHeight() / 2)); 158 | } 159 | } 160 | 161 | function closeOpened(event, menuTree) { 162 | if (opened) { 163 | 164 | if (!menuTree) { 165 | menuTree = [opened]; 166 | } 167 | 168 | var parent; 169 | 170 | if (opened[0] !== menuTree[0][0]) { 171 | parent = opened; 172 | } else { 173 | parent = menuTree[menuTree.length - 1]; 174 | if (parent.parent().hasClass(menuClass)) { 175 | parent = parent.parent(); 176 | } 177 | } 178 | 179 | parent.find('.' + openClass).removeClass(openClass); 180 | 181 | if (parent.hasClass(openClass)) 182 | parent.removeClass(openClass); 183 | 184 | if (parent === opened) { 185 | opened = null; 186 | $(backdrop).remove(); 187 | } 188 | } 189 | } 190 | 191 | function getSubMenuParents($submenu) { 192 | var result = [$submenu]; 193 | var $parent; 194 | while (!$parent || $parent.hasClass(subMenuClass)) { 195 | $parent = ($parent || $submenu).parent(); 196 | if ($parent.hasClass(menuClass)) { 197 | $parent = $parent.parent(); 198 | } 199 | if ($parent.children(toggle)) { 200 | result.unshift($parent); 201 | } 202 | } 203 | return result; 204 | } 205 | 206 | function getParent($this) { 207 | var selector = $this.attr('data-target'); 208 | 209 | if (!selector) { 210 | selector = $this.attr('href'); 211 | selector = selector && /#[A-Za-z]/.test(selector) && selector.replace(/.*(?=#[^\s]*$)/, ''); //strip for ie7 212 | } 213 | 214 | var $parent = selector && $(selector); 215 | 216 | return $parent && $parent.length ? $parent : $this.parent() 217 | } 218 | 219 | // DROPDOWN PLUGIN DEFINITION 220 | // ========================== 221 | 222 | var old = $.fn.dropdown; 223 | 224 | $.fn.dropdown = function (option) { 225 | return this.each(function () { 226 | var $this = $(this); 227 | var data = $this.data('bs.dropdown'); 228 | 229 | if (!data) $this.data('bs.dropdown', (data = new Dropdown(this))); 230 | if (typeof option == 'string') data[option].call($this); 231 | }) 232 | }; 233 | 234 | $.fn.dropdown.Constructor = Dropdown; 235 | 236 | $.fn.dropdown.clearMenus = function(e) { 237 | $(backdrop).remove(); 238 | $('.' + openClass + ' ' + toggle).each(function () { 239 | var $parent = getParent($(this)); 240 | var relatedTarget = { relatedTarget: this }; 241 | if (!$parent.hasClass('open')) return; 242 | $parent.trigger(e = $.Event('hide' + eventNamespace, relatedTarget)); 243 | if (e.isDefaultPrevented()) return; 244 | $parent.removeClass('open').trigger('hidden' + eventNamespace, relatedTarget); 245 | }); 246 | return this; 247 | }; 248 | 249 | 250 | // DROPDOWN NO CONFLICT 251 | // ==================== 252 | 253 | $.fn.dropdown.noConflict = function () { 254 | $.fn.dropdown = old; 255 | return this 256 | }; 257 | 258 | 259 | $(document).off(namespace) 260 | .on('click' + namespace, closeOpened) 261 | .on('click' + namespace, toggle, proto.toggle) 262 | .on('click' + namespace, '.dropdown-menu > li > input[type="checkbox"] ~ label, .dropdown-menu > li > input[type="checkbox"], .dropdown-menu.noclose > li', function (e) { 263 | e.stopPropagation() 264 | }) 265 | .on('change' + namespace, '.dropdown-menu > li > input[type="checkbox"], .dropdown-menu > li > input[type="radio"]', proto.change) 266 | .on('keydown' + namespace, toggle + ', [role="menu"], [role="listbox"]', proto.keydown) 267 | }(jQuery)); -------------------------------------------------------------------------------- /prelab/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | viskit_port=$("$DIR/findport.py" 5000 1) 4 | xhost=xhost 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | if [[ $(uname) == 'Darwin' ]]; then 12 | # if xhost not defined, check 13 | if ! hash $xhost 2>/dev/null; then 14 | xhost=/opt/X11/bin/xhost 15 | if [ ! -f $xhost ]; then 16 | echo "xhost not found!" 17 | exit 18 | fi 19 | fi 20 | ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}') 21 | $xhost + $ip >/dev/null 22 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 23 | -e DISPLAY=$ip:0 \ 24 | -v "$DIR":/root/code/bootcamp_pg \ 25 | -ti dementrock/deeprlbootcamp \ 26 | ${1-/bin/bash} "${@:2}" 27 | $xhost - $ip >/dev/null 28 | elif [[ $(uname) == 'Linux' ]]; then 29 | $xhost +local:root >/dev/null 30 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 31 | -e DISPLAY=$DISPLAY \ 32 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 33 | -v "$DIR":/root/code/bootcamp_pg \ 34 | -ti dementrock/deeprlbootcamp \ 35 | ${1-/bin/bash} "${@:2}" 36 | $xhost -local:root >/dev/null 37 | else 38 | echo "This script only supports macOS or Linux" 39 | fi 40 | -------------------------------------------------------------------------------- /prelab/docker_run_vnc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | vnc_port=$("$DIR/findport.py" 3000 1) 4 | viskit_port=$("$DIR/findport.py" 5000 1) 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284" 12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 13 | -v "$DIR":/root/code/bootcamp_pg \ 14 | -ti dementrock/deeprlbootcamp \ 15 | ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}" 16 | -------------------------------------------------------------------------------- /prelab/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | dependencies: 3 | - python==3.5.3 4 | - numpy==1.13.1 5 | - notebook==5.0.0 6 | - pip: 7 | - gym==0.9.2 8 | - chainer==2.0.1 9 | - matplotlib==2.0.2 10 | -------------------------------------------------------------------------------- /prelab/findport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Usage: findport.py 3000 100 4 | # 5 | 6 | """ 7 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 8 | 9 | Copyright 2017 Deep RL Bootcamp Organizers. 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | 17 | """ 18 | 19 | 20 | from __future__ import print_function 21 | import socket 22 | from contextlib import closing 23 | import sys 24 | 25 | if len(sys.argv) != 3: 26 | print("Usage: {} ".format(sys.argv[0])) 27 | sys.exit(1) 28 | 29 | base = int(sys.argv[1]) 30 | increment = int(sys.argv[2]) 31 | 32 | 33 | def find_free_port(): 34 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 35 | for port in range(base, 65536, increment): 36 | try: 37 | s.bind(('', port)) 38 | return s.getsockname()[1] 39 | except socket.error: 40 | continue 41 | 42 | 43 | print(find_free_port()) 44 | -------------------------------------------------------------------------------- /prelab/launch_bg_screen_buffer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | killall() { 4 | kill -INT "$xvfb_pid" 5 | kill -INT "$x11vnc_pid" 6 | exit 7 | } 8 | 9 | trap killall SIGINT 10 | trap killall SIGTERM 11 | trap killall SIGKILL 12 | 13 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$! 14 | 15 | mkdir ~/.x11vnc 16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd 17 | 18 | command="${1-/bin/bash} ${@:2}" 19 | 20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!" 21 | 22 | DISPLAY=:99 $command 23 | 24 | killall 25 | -------------------------------------------------------------------------------- /prelab/prelab.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inoryy/Deep-RL-Bootcamp-Labs/afc58ea83777419d290c5495ce167b7aa79ea04a/prelab/prelab.pdf -------------------------------------------------------------------------------- /prelab/scripts/setup_xquartz.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Check if XQuartz is installed 3 | 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@" 5 | 6 | app_dir=/Applications/Utilities/XQuartz.app 7 | 8 | if [ -d $app_dir ]; then 9 | # Check installed version 10 | app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString) 11 | if [ $app_version == "2.7.11" ]; then 12 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 13 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 14 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 15 | echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!" 16 | exit 17 | else 18 | read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response 19 | case "$response" in 20 | [yY][eE][sS]|[yY]) 21 | ;; 22 | *) 23 | exit 24 | ;; 25 | esac 26 | fi 27 | fi 28 | 29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg 30 | dmg_path=/tmp/xquartz.dmg 31 | echo "Downloading dmg from $url..." 32 | /usr/bin/curl -L -o $dmg_path $url 33 | echo "Mounting dmg file..." 34 | hdiutil mount $dmg_path 35 | echo "Installing..." 36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg -target / 37 | 38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 41 | 42 | echo "Done! Make sure to log out and then log back in for the changes to take effect." 43 | -------------------------------------------------------------------------------- /prelab/scripts/test_environment_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | def main(): 17 | import roboschool 18 | import gym 19 | import chainer 20 | env = gym.make('CartPole-v0') 21 | env.reset() 22 | env.step(env.action_space.sample()) 23 | env = gym.make('RoboschoolHalfCheetah-v1') 24 | env.reset() 25 | env.step(env.action_space.sample()) 26 | print("Your environment has been successfully set up!") 27 | 28 | 29 | if __name__ == "__main__": 30 | main() 31 | -------------------------------------------------------------------------------- /prelab/simplepg/point_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | 16 | from gym import Env 17 | from gym.envs.registration import register 18 | from gym.utils import seeding 19 | from gym import spaces 20 | from gym.envs.classic_control.cartpole import CartPoleEnv 21 | import numpy as np 22 | 23 | 24 | class PointEnv(Env): 25 | metadata = { 26 | 'render.modes': ['human', 'rgb_array'], 27 | 'video.frames_per_second': 50 28 | } 29 | 30 | def __init__(self): 31 | self.action_space = spaces.Box(low=-1, high=1, shape=(2,)) 32 | self.observation_space = spaces.Box(low=-1, high=1, shape=(2,)) 33 | 34 | self._seed() 35 | self.viewer = None 36 | self.state = None 37 | 38 | def _seed(self, seed=None): 39 | self.np_random, seed = seeding.np_random(seed) 40 | return [seed] 41 | 42 | def _step(self, action): 43 | action = np.clip(action, -0.025, 0.025) 44 | self.state = np.clip(self.state + action, -1, 1) 45 | return np.array(self.state), -np.linalg.norm(self.state), False, {} 46 | 47 | def _reset(self): 48 | while True: 49 | self.state = self.np_random.uniform(low=-1, high=1, size=(2,)) 50 | # Sample states that are far away 51 | if np.linalg.norm(self.state) > 0.9: 52 | break 53 | return np.array(self.state) 54 | 55 | # def _render(self, mode='human', close=False): 56 | # pass 57 | 58 | def _render(self, mode='human', close=False): 59 | if close: 60 | if self.viewer is not None: 61 | self.viewer.close() 62 | self.viewer = None 63 | return 64 | 65 | screen_width = 800 66 | screen_height = 800 67 | 68 | if self.viewer is None: 69 | from gym.envs.classic_control import rendering 70 | self.viewer = rendering.Viewer(screen_width, screen_height) 71 | 72 | agent = rendering.make_circle( 73 | min(screen_height, screen_width) * 0.03) 74 | origin = rendering.make_circle( 75 | min(screen_height, screen_width) * 0.03) 76 | trans = rendering.Transform(translation=(0, 0)) 77 | agent.add_attr(trans) 78 | self.trans = trans 79 | agent.set_color(1, 0, 0) 80 | origin.set_color(0, 0, 0) 81 | origin.add_attr(rendering.Transform( 82 | translation=(screen_width // 2, screen_height // 2))) 83 | self.viewer.add_geom(agent) 84 | self.viewer.add_geom(origin) 85 | 86 | # self.trans.set_translation(0, 0) 87 | self.trans.set_translation( 88 | (self.state[0] + 1) / 2 * screen_width, 89 | (self.state[1] + 1) / 2 * screen_height, 90 | ) 91 | 92 | return self.viewer.render(return_rgb_array=mode == 'rgb_array') 93 | 94 | 95 | register( 96 | 'Point-v0', 97 | entry_point='simplepg.point_env:PointEnv', 98 | timestep_limit=40, 99 | ) 100 | -------------------------------------------------------------------------------- /prelab/simplepg/rollout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 5 | 6 | Copyright 2017 Deep RL Bootcamp Organizers. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | 14 | """ 15 | 16 | import click 17 | import numpy as np 18 | import gym 19 | 20 | from simplepg.simple_utils import include_bias, weighted_sample 21 | 22 | 23 | def point_get_action(theta, ob, rng=np.random): 24 | ob_1 = include_bias(ob) 25 | mean = theta.dot(ob_1) 26 | return rng.normal(loc=mean, scale=1.) 27 | 28 | 29 | def cartpole_get_action(theta, ob, rng=np.random): 30 | ob_1 = include_bias(ob) 31 | logits = ob_1.dot(theta.T) 32 | return weighted_sample(logits, rng=rng) 33 | 34 | 35 | @click.command() 36 | @click.argument("env_id", type=str, default="Point-v0") 37 | def main(env_id): 38 | # Register the environment 39 | rng = np.random.RandomState(42) 40 | 41 | if env_id == 'CartPole-v0': 42 | env = gym.make('CartPole-v0') 43 | get_action = cartpole_get_action 44 | obs_dim = env.observation_space.shape[0] 45 | action_dim = env.action_space.n 46 | elif env_id == 'Point-v0': 47 | from simplepg import point_env 48 | env = gym.make('Point-v0') 49 | get_action = point_get_action 50 | obs_dim = env.observation_space.shape[0] 51 | action_dim = env.action_space.shape[0] 52 | else: 53 | raise ValueError( 54 | "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'") 55 | 56 | env.seed(42) 57 | 58 | # Initialize parameters 59 | theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1)) 60 | 61 | while True: 62 | ob = env.reset() 63 | done = False 64 | # Only render the first trajectory 65 | # Collect a new trajectory 66 | rewards = [] 67 | while not done: 68 | action = get_action(theta, ob, rng=rng) 69 | next_ob, rew, done, _ = env.step(action) 70 | ob = next_ob 71 | env.render() 72 | rewards.append(rew) 73 | 74 | print("Episode reward: %.2f" % np.sum(rewards)) 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /prelab/simplepg/simple_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project was developed by Rocky Duan, Peter Chen, Pieter Abbeel for the Berkeley Deep RL Bootcamp, August 2017. Bootcamp website with slides and lecture videos: https://sites.google.com/view/deep-rl-bootcamp/. 3 | 4 | Copyright 2017 Deep RL Bootcamp Organizers. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | 14 | 15 | 16 | import numpy as np 17 | import scipy.special 18 | import chainer 19 | 20 | 21 | # Compute gradient approximately using finite difference 22 | def numerical_grad(f, x, eps=1e-8): 23 | grad = np.zeros_like(x) 24 | for i in range(len(x)): 25 | xplus = np.array(x) 26 | xplus[i] += eps 27 | fplus = f(xplus) 28 | xminus = np.array(x) 29 | xminus[i] -= eps 30 | fminus = f(xminus) 31 | grad[i] = (fplus - fminus) / (2 * eps) 32 | return grad 33 | 34 | 35 | def gradient_check(f, g, x): 36 | # Test the implementation of g(x) = df/dx 37 | # Perform numerical differentiation and test it 38 | g_num = numerical_grad(f, x) 39 | g_test = g(x) 40 | try: 41 | np.testing.assert_allclose(g_num, g_test, rtol=1e-5) 42 | print("Gradient check passed!") 43 | except AssertionError as e: 44 | print(e) 45 | print("Error: Gradient check didn't pass!") 46 | exit() 47 | 48 | 49 | def log_softmax(logits): 50 | return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True) 51 | 52 | 53 | def softmax(logits): 54 | x = logits 55 | x = x - np.max(x, axis=-1, keepdims=True) 56 | x = np.exp(x) 57 | return x / np.sum(x, axis=-1, keepdims=True) 58 | 59 | 60 | def weighted_sample(logits, rng=np.random): 61 | weights = softmax(logits) 62 | return min( 63 | int(np.sum(rng.uniform() > np.cumsum(weights))), 64 | len(weights) - 1 65 | ) 66 | 67 | 68 | def include_bias(x): 69 | # Add a constant term (1.0) to each entry in x 70 | return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1) 71 | 72 | 73 | _tested = set() 74 | _tests = dict() 75 | 76 | nprs = np.random.RandomState 77 | 78 | 79 | def register_test(fn_name, kwargs, desired_output=None): 80 | assert fn_name not in _tests 81 | _tests[fn_name] = (kwargs, desired_output) 82 | 83 | 84 | def assert_allclose(a, b): 85 | if isinstance(a, (np.ndarray, float, int)): 86 | np.testing.assert_allclose(a, b) 87 | elif isinstance(a, (tuple, list)): 88 | assert isinstance(b, (tuple, list)) 89 | assert len(a) == len(b) 90 | for a_i, b_i in zip(a, b): 91 | assert_allclose(a_i, b_i) 92 | elif isinstance(a, chainer.Variable): 93 | assert isinstance(b, chainer.Variable) 94 | assert_allclose(a.data, b.data) 95 | else: 96 | raise NotImplementedError 97 | 98 | 99 | def test_once(fn): 100 | module = fn.__module__ 101 | name = fn.__name__ 102 | key = module + "." + name 103 | if key in _tested: 104 | return 105 | assert key in _tests, "Test for %s not found!" % key 106 | kwargs, desired_output = _tests[key] 107 | _tested.add(key) 108 | 109 | if callable(kwargs): 110 | kwargs = kwargs() 111 | 112 | if callable(desired_output): 113 | desired_output = desired_output() 114 | 115 | if desired_output is None: 116 | print("Desired output for %s:" % key, repr(fn(**kwargs))) 117 | exit() 118 | else: 119 | try: 120 | output = fn(**kwargs) 121 | assert_allclose(desired_output, output) 122 | print("Test for %s passed!" % key) 123 | except AssertionError as e: 124 | print(e) 125 | print("Error: test for %s didn't pass!" % key) 126 | exit() 127 | --------------------------------------------------------------------------------