├── README.md ├── labs ├── lab1and2.md ├── lab1and2 │ ├── .ipynb_checkpoints │ │ ├── Lab 1 - Problem 1-checkpoint.ipynb │ │ └── Lab 1 - Problem 2-checkpoint.ipynb │ ├── Lab 1 - Problem 1.ipynb │ ├── Lab 1 - Problem 2.ipynb │ ├── Lab 1 - Problem 3.ipynb │ ├── Lab 2.ipynb │ ├── crawler_env.py │ ├── discrete_env.py │ ├── environment.yml │ ├── frozen_lake.py │ └── misc.py ├── lab3.pdf ├── lab3 │ ├── docker_run.sh │ ├── docker_run_vnc.sh │ ├── environment.yml │ ├── findport.sh │ ├── launch_bg_screen_buffer.sh │ ├── logger.py │ ├── scripts │ │ ├── setup_xquartz.sh │ │ └── test_environment_setup.py │ ├── simpledqn │ │ ├── __init__.py │ │ ├── gridworld_env.py │ │ ├── main.py │ │ ├── replay_buffer.py │ │ ├── replay_buffer_warm_start.pkl │ │ ├── simple_utils.py │ │ ├── weights_warm_start.pkl │ │ └── wrappers.py │ └── viskit │ │ ├── __init__.py │ │ ├── core.py │ │ ├── frontend.py │ │ ├── static │ │ ├── css │ │ │ ├── bootstrap.min.css │ │ │ └── dropdowns-enhancement.css │ │ └── js │ │ │ ├── bootstrap.min.js │ │ │ ├── dropdowns-enhancement.js │ │ │ ├── jquery-1.10.2.min.js │ │ │ ├── jquery.loadTemplate-1.5.6.js │ │ │ └── plotly-latest.min.js │ │ └── templates │ │ └── main.html ├── lab4.pdf ├── lab4 │ ├── a2c.py │ ├── alg_utils.py │ ├── algs.py │ ├── docker_run.sh │ ├── docker_run_vnc.sh │ ├── env_makers.py │ ├── environment.yml │ ├── experiments │ │ ├── run_a2c_breakout.py │ │ ├── run_a2c_pong.py │ │ ├── run_a2c_pong_warm_start.py │ │ ├── run_pg_cartpole.py │ │ ├── run_trpo_cartpole.py │ │ ├── run_trpo_half_cheetah.py │ │ └── run_trpo_pendulum.py │ ├── findport.py │ ├── launch_bg_screen_buffer.sh │ ├── logger.py │ ├── models.py │ ├── pg.py │ ├── pong_warm_start.pkl │ ├── scripts │ │ ├── resume_training.py │ │ ├── setup_xquartz.sh │ │ ├── sim_policy.py │ │ └── test_environment_setup.py │ ├── simplepg │ │ ├── __pycache__ │ │ │ ├── point_env.cpython-35.pyc │ │ │ └── simple_utils.cpython-35.pyc │ │ ├── main.py │ │ ├── point_env.py │ │ ├── rollout.py │ │ └── simple_utils.py │ ├── tests │ │ ├── __pycache__ │ │ │ ├── a2c_tests.cpython-35.pyc │ │ │ ├── pg_tests.cpython-35.pyc │ │ │ ├── simplepg_tests.cpython-35.pyc │ │ │ └── trpo_tests.cpython-35.pyc │ │ ├── a2c_tests.py │ │ ├── pg_tests.py │ │ ├── simplepg_tests.py │ │ └── trpo_tests.py │ ├── trpo.py │ ├── utils.py │ └── viskit │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ └── core.cpython-35.pyc │ │ ├── core.py │ │ ├── frontend.py │ │ ├── static │ │ ├── css │ │ │ ├── bootstrap.min.css │ │ │ └── dropdowns-enhancement.css │ │ └── js │ │ │ ├── bootstrap.min.js │ │ │ ├── dropdowns-enhancement.js │ │ │ ├── jquery-1.10.2.min.js │ │ │ ├── jquery.loadTemplate-1.5.6.js │ │ │ └── plotly-latest.min.js │ │ └── templates │ │ └── main.html ├── lab5.pdf ├── lab5 │ ├── a2c.py │ ├── alg_utils.py │ ├── algs.py │ ├── cloudexec.py │ ├── cloudexec.yml.template │ ├── docker_run.sh │ ├── docker_run_vnc.sh │ ├── env_makers.py │ ├── environment.yml │ ├── experiments │ │ ├── run_a2c_breakout.py │ │ ├── run_a2c_pong.py │ │ ├── run_cloud_trpo_cartpole.py │ │ ├── run_cloud_trpo_pendulum_baseline.py │ │ ├── run_pg_cartpole.py │ │ ├── run_trpo_cartpole.py │ │ ├── run_trpo_half_cheetah.py │ │ └── run_trpo_pendulum.py │ ├── findport.py │ ├── launch_bg_screen_buffer.sh │ ├── logger.py │ ├── models.py │ ├── pg.py │ ├── scripts │ │ ├── ec2ctl.py │ │ ├── generate_key_pairs.py │ │ ├── resume_training.py │ │ ├── setup_xquartz.sh │ │ ├── sim_policy.py │ │ ├── sync_s3.py │ │ ├── test_ec2_setup.py │ │ └── test_environment_setup.py │ ├── simplepg │ │ ├── main.py │ │ ├── point_env.py │ │ ├── rollout.py │ │ └── simple_utils.py │ ├── tests │ │ ├── a2c_tests.py │ │ ├── pg_tests.py │ │ ├── simplepg_tests.py │ │ └── trpo_tests.py │ ├── trpo.py │ ├── utils.py │ └── viskit │ │ ├── __init__.py │ │ ├── core.py │ │ ├── frontend.py │ │ ├── static │ │ ├── css │ │ │ ├── bootstrap.min.css │ │ │ └── dropdowns-enhancement.css │ │ └── js │ │ │ ├── bootstrap.min.js │ │ │ ├── dropdowns-enhancement.js │ │ │ ├── jquery-1.10.2.min.js │ │ │ ├── jquery.loadTemplate-1.5.6.js │ │ │ └── plotly-latest.min.js │ │ └── templates │ │ └── main.html ├── setup.pdf └── setup │ ├── docker_run.sh │ ├── docker_run_vnc.sh │ ├── environment.yml │ ├── findport.py │ ├── launch_bg_screen_buffer.sh │ ├── scripts │ ├── setup_xquartz.sh │ └── test_environment_setup.py │ └── simplepg │ ├── __pycache__ │ ├── point_env.cpython-35.pyc │ └── simple_utils.cpython-35.pyc │ ├── point_env.py │ ├── rollout.py │ └── simple_utils.py └── slides ├── FrontiersPieterAbbeelPeterChenRockyDuan.pdf ├── Lec10aUtilities.pdf ├── Lec10binverseRL.pdf ├── Lec1intromdpsexactmethods.pdf ├── Lec2samplingbasedapproximationsandfunctionfitting.pdf ├── Lec3DQN.pdf ├── Lec4apolicygradientsactorcritic.pdf ├── Lec4b_Pong_from_Pixels.pdf ├── Lec5advancedpolicygradientmethods.pdf ├── Lec6nutsandboltsdeeprlresearch.pdf ├── Lec7deeprlbootcampsvgscg.pdf ├── Lec8derivativefree.pdf ├── Lec9modelbaseddeeprl.pdf └── TAintros.pdf /README.md: -------------------------------------------------------------------------------- 1 | # Deep Reinforcement Learning Bootcamp @ UCBerkeley 2017 2 | 3 | That was a wonderful experience! Awesome people and lots of learning. 4 | 5 | **Note:** Slides and videos are now officially available at the [bootcamp webpage](https://sites.google.com/view/deep-rl-bootcamp/lectures?authuser=0) 6 | 7 | ## Content 8 | 9 | * **labs:** Completed lab lessons + environment setup guide 10 | * **slides:** Slides used by professors for the lectures 11 | -------------------------------------------------------------------------------- /labs/lab1and2.md: -------------------------------------------------------------------------------- 1 | * Activate the conda environment by running 2 | ``` 3 | source activate deeprlbootcamp 4 | ``` 5 | * Launch IPython Notebook from this directory; this should open up a browser window where you can click to open Lab1 and Lab2 6 | ``` 7 | jupyter notebook 8 | ``` 9 | * After opening a lab file, click “File - Trust Notebook” 10 | * If you have never used IPython Notebook before, skim this quick tutorial here: http://cs231n.github.io/ipython-tutorial/ -------------------------------------------------------------------------------- /labs/lab1and2/discrete_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import Env, spaces 4 | from gym.utils import seeding 5 | 6 | def categorical_sample(prob_n, np_random): 7 | """ 8 | Sample from categorical distribution 9 | Each row specifies class probabilities 10 | """ 11 | prob_n = np.asarray(prob_n) 12 | csprob_n = np.cumsum(prob_n) 13 | return (csprob_n > np_random.rand()).argmax() 14 | 15 | 16 | class DiscreteEnv(Env): 17 | 18 | """ 19 | Has the following members 20 | - nS: number of states 21 | - nA: number of actions 22 | - P: transitions (*) 23 | - isd: initial state distribution (**) 24 | 25 | (*) dictionary dict of dicts of lists, where 26 | P[s][a] == [(probability, nextstate, reward, done), ...] 27 | (**) list or array of length nS 28 | 29 | 30 | """ 31 | def __init__(self, nS, nA, P, isd): 32 | self.P = P 33 | self.isd = isd 34 | self.lastaction=None # for rendering 35 | self.nS = nS 36 | self.nA = nA 37 | 38 | self.action_space = spaces.Discrete(self.nA) 39 | self.observation_space = spaces.Discrete(self.nS) 40 | 41 | self._seed() 42 | self._reset() 43 | 44 | def _seed(self, seed=None): 45 | self.np_random, seed = seeding.np_random(seed) 46 | return [seed] 47 | 48 | def _reset(self): 49 | self.s = categorical_sample(self.isd, self.np_random) 50 | self.lastaction=None 51 | return self.s 52 | 53 | def _step(self, a): 54 | transitions = self.P[self.s][a] 55 | i = categorical_sample([t[0] for t in transitions], self.np_random) 56 | p, s, r, d= transitions[i] 57 | self.s = s 58 | self.lastaction=a 59 | return (s, r, d, {"prob" : p}) 60 | -------------------------------------------------------------------------------- /labs/lab1and2/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | channels: 3 | - menpo 4 | - soumith 5 | dependencies: 6 | - python==3.5.3 7 | - opencv3=3.1.0 8 | - numpy==1.13.1 9 | - scipy==0.19.1 10 | - notebook 11 | - pip: 12 | - gym==0.9.2 13 | - chainer==2.0.1 14 | - ipdb==0.10.3 15 | - tblib==1.3.2 16 | - Pillow==4.2.1 17 | - PyOpenGL==3.1.0 18 | - cloudpickle==0.3.1 19 | - click==6.7 20 | - python-dateutil==2.6.1 21 | - pyyaml==3.12 22 | - easydict==1.7 23 | - boto3==1.4.4 24 | - mako==1.0.7 25 | - redis==2.10.5 26 | - Flask==0.12.2 27 | - plotly==2.0.12 28 | - tqdm==4.14.0 29 | - cupy==1.0.1; 'linux' in sys_platform 30 | - cached-property==1.3.0 31 | - h5py==2.7.0 32 | - matplotlib 33 | -------------------------------------------------------------------------------- /labs/lab1and2/frozen_lake.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | from six import StringIO, b 4 | 5 | from gym import utils 6 | import discrete_env 7 | 8 | LEFT = 0 9 | DOWN = 1 10 | RIGHT = 2 11 | UP = 3 12 | 13 | MAPS = { 14 | "4x4": [ 15 | "SFFF", 16 | "FHFH", 17 | "FFFH", 18 | "HFFG" 19 | ], 20 | "8x8": [ 21 | "SFFFFFFF", 22 | "FFFFFFFF", 23 | "FFFHFFFF", 24 | "FFFFFHFF", 25 | "FFFHFFFF", 26 | "FHHFFFHF", 27 | "FHFFHFHF", 28 | "FFFHFFFG" 29 | ], 30 | } 31 | 32 | class FrozenLakeEnv(discrete_env.DiscreteEnv): 33 | """ 34 | Winter is here. You and your friends were tossing around a frisbee at the park 35 | when you made a wild throw that left the frisbee out in the middle of the lake. 36 | The water is mostly frozen, but there are a few holes where the ice has melted. 37 | If you step into one of those holes, you'll fall into the freezing water. 38 | At this time, there's an international frisbee shortage, so it's absolutely imperative that 39 | you navigate across the lake and retrieve the disc. 40 | However, the ice is slippery, so you won't always move in the direction you intend. 41 | The surface is described using a grid like the following 42 | 43 | SFFF 44 | FHFH 45 | FFFH 46 | HFFG 47 | 48 | S : starting point, safe 49 | F : frozen surface, safe 50 | H : hole, fall to your doom 51 | G : goal, where the frisbee is located 52 | 53 | The episode ends when you reach the goal or fall in a hole. 54 | You receive a reward of 1 if you reach the goal, and zero otherwise. 55 | 56 | """ 57 | 58 | metadata = {'render.modes': ['human', 'ansi']} 59 | 60 | def __init__(self, desc=None, map_name="4x4",is_slippery=True): 61 | if desc is None and map_name is None: 62 | raise ValueError('Must provide either desc or map_name') 63 | elif desc is None: 64 | desc = MAPS[map_name] 65 | self.desc = desc = np.asarray(desc,dtype='c') 66 | self.nrow, self.ncol = nrow, ncol = desc.shape 67 | 68 | nA = 4 69 | nS = nrow * ncol 70 | 71 | isd = np.array(desc == b'S').astype('float64').ravel() 72 | isd /= isd.sum() 73 | 74 | P = {s : {a : [] for a in range(nA)} for s in range(nS)} 75 | 76 | def to_s(row, col): 77 | return row*ncol + col 78 | def inc(row, col, a): 79 | if a==0: # left 80 | col = max(col-1,0) 81 | elif a==1: # down 82 | row = min(row+1,nrow-1) 83 | elif a==2: # right 84 | col = min(col+1,ncol-1) 85 | elif a==3: # up 86 | row = max(row-1,0) 87 | return (row, col) 88 | 89 | for row in range(nrow): 90 | for col in range(ncol): 91 | s = to_s(row, col) 92 | for a in range(4): 93 | li = P[s][a] 94 | letter = desc[row, col] 95 | if letter in b'GH': 96 | li.append((1.0, s, 0, True)) 97 | else: 98 | if is_slippery: 99 | for b in [(a-1)%4, a, (a+1)%4]: 100 | newrow, newcol = inc(row, col, b) 101 | newstate = to_s(newrow, newcol) 102 | newletter = desc[newrow, newcol] 103 | done = bytes(newletter) in b'GH' 104 | rew = float(newletter == b'G') 105 | li.append((0.8 if b==a else 0.1, newstate, rew, done)) 106 | else: 107 | newrow, newcol = inc(row, col, a) 108 | newstate = to_s(newrow, newcol) 109 | newletter = desc[newrow, newcol] 110 | done = bytes(newletter) in b'GH' 111 | rew = float(newletter == b'G') 112 | li.append((1.0, newstate, rew, done)) 113 | 114 | super(FrozenLakeEnv, self).__init__(nS, nA, P, isd) 115 | 116 | def _render(self, mode='human', close=False): 117 | if close: 118 | return 119 | outfile = StringIO() if mode == 'ansi' else sys.stdout 120 | 121 | row, col = self.s // self.ncol, self.s % self.ncol 122 | desc = self.desc.tolist() 123 | desc = [[c.decode('utf-8') for c in line] for line in desc] 124 | desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) 125 | if self.lastaction is not None: 126 | outfile.write(" ({})\n".format(["Left","Down","Right","Up"][self.lastaction])) 127 | else: 128 | outfile.write("\n") 129 | outfile.write("\n".join(''.join(line) for line in desc)+"\n") 130 | 131 | return outfile 132 | -------------------------------------------------------------------------------- /labs/lab1and2/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | from six import StringIO, b 4 | 5 | from gym import utils 6 | import discrete_env 7 | 8 | LEFT = 0 9 | DOWN = 1 10 | RIGHT = 2 11 | UP = 3 12 | 13 | MAPS = { 14 | "4x4": [ 15 | "SFFF", 16 | "FHFH", 17 | "FFFH", 18 | "HFFG" 19 | ], 20 | "8x8": [ 21 | "SFFFFFFF", 22 | "FFFFFFFF", 23 | "FFFHFFFF", 24 | "FFFFFHFF", 25 | "FFFHFFFF", 26 | "FHHFFFHF", 27 | "FHFFHFHF", 28 | "FFFHFFFG" 29 | ], 30 | } 31 | 32 | class FrozenLakeEnv(discrete_env.DiscreteEnv): 33 | """ 34 | Winter is here. You and your friends were tossing around a frisbee at the park 35 | when you made a wild throw that left the frisbee out in the middle of the lake. 36 | The water is mostly frozen, but there are a few holes where the ice has melted. 37 | If you step into one of those holes, you'll fall into the freezing water. 38 | At this time, there's an international frisbee shortage, so it's absolutely imperative that 39 | you navigate across the lake and retrieve the disc. 40 | However, the ice is slippery, so you won't always move in the direction you intend. 41 | The surface is described using a grid like the following 42 | 43 | SFFF 44 | FHFH 45 | FFFH 46 | HFFG 47 | 48 | S : starting point, safe 49 | F : frozen surface, safe 50 | H : hole, fall to your doom 51 | G : goal, where the frisbee is located 52 | 53 | The episode ends when you reach the goal or fall in a hole. 54 | You receive a reward of 1 if you reach the goal, and zero otherwise. 55 | 56 | """ 57 | 58 | metadata = {'render.modes': ['human', 'ansi']} 59 | 60 | def __init__(self, desc=None, map_name="4x4",is_slippery=True): 61 | if desc is None and map_name is None: 62 | raise ValueError('Must provide either desc or map_name') 63 | elif desc is None: 64 | desc = MAPS[map_name] 65 | self.desc = desc = np.asarray(desc,dtype='c') 66 | self.nrow, self.ncol = nrow, ncol = desc.shape 67 | 68 | nA = 4 69 | nS = nrow * ncol 70 | 71 | isd = np.array(desc == b'S').astype('float64').ravel() 72 | isd /= isd.sum() 73 | 74 | P = {s : {a : [] for a in range(nA)} for s in range(nS)} 75 | 76 | def to_s(row, col): 77 | return row*ncol + col 78 | def inc(row, col, a): 79 | if a==0: # left 80 | col = max(col-1,0) 81 | elif a==1: # down 82 | row = min(row+1,nrow-1) 83 | elif a==2: # right 84 | col = min(col+1,ncol-1) 85 | elif a==3: # up 86 | row = max(row-1,0) 87 | return (row, col) 88 | 89 | for row in range(nrow): 90 | for col in range(ncol): 91 | s = to_s(row, col) 92 | for a in range(4): 93 | li = P[s][a] 94 | letter = desc[row, col] 95 | if letter in b'GH': 96 | li.append((1.0, s, 0, True)) 97 | else: 98 | if is_slippery: 99 | for b in [(a-1)%4, a, (a+1)%4]: 100 | newrow, newcol = inc(row, col, b) 101 | newstate = to_s(newrow, newcol) 102 | newletter = desc[newrow, newcol] 103 | done = bytes(newletter) in b'GH' 104 | rew = float(newletter == b'G') 105 | li.append((0.8 if b==a else 0.1, newstate, rew, done)) 106 | else: 107 | newrow, newcol = inc(row, col, a) 108 | newstate = to_s(newrow, newcol) 109 | newletter = desc[newrow, newcol] 110 | done = bytes(newletter) in b'GH' 111 | rew = float(newletter == b'G') 112 | li.append((1.0, newstate, rew, done)) 113 | 114 | super(FrozenLakeEnv, self).__init__(nS, nA, P, isd) 115 | 116 | def _render(self, mode='human', close=False): 117 | if close: 118 | return 119 | outfile = StringIO() if mode == 'ansi' else sys.stdout 120 | 121 | row, col = self.s // self.ncol, self.s % self.ncol 122 | desc = self.desc.tolist() 123 | desc = [[c.decode('utf-8') for c in line] for line in desc] 124 | desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) 125 | if self.lastaction is not None: 126 | outfile.write(" ({})\n".format(["Left","Down","Right","Up"][self.lastaction])) 127 | else: 128 | outfile.write("\n") 129 | outfile.write("\n".join(''.join(line) for line in desc)+"\n") 130 | 131 | return outfile 132 | 133 | def make_grader(expected): 134 | boxed_i = [0] 135 | boxed_err = [False] 136 | expected_lines = expected.split("\n") 137 | def checking_print(line): 138 | if boxed_i[0] < len(expected_lines): 139 | expected_line = expected_lines[boxed_i[0]] 140 | else: 141 | expected_line = "[END]" 142 | if expected_line == line: 143 | print(line) 144 | else: 145 | boxed_err[0] = True 146 | print("\x1b[41m", end="") 147 | print(line, end="") 148 | print("\x1b[0m", end="") 149 | print(" *** Expected: \x1b[42m" + expected_line + "\x1b[0m") 150 | boxed_i[0] += 1 151 | if boxed_i[0] == len(expected_lines): 152 | print("Test failed" if boxed_err[0] else "Test succeeded") 153 | return checking_print 154 | -------------------------------------------------------------------------------- /labs/lab3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3.pdf -------------------------------------------------------------------------------- /labs/lab3/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | viskit_port=$("$DIR/findport.sh" 5000 1) 4 | xhost=xhost 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | if [[ $(uname) == 'Darwin' ]]; then 12 | # if xhost not defined, check 13 | if ! hash $xhost 2>/dev/null; then 14 | xhost=/opt/X11/bin/xhost 15 | if [ ! -f $xhost ]; then 16 | echo "xhost not found!" 17 | exit 18 | fi 19 | fi 20 | ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}') 21 | $xhost + $ip >/dev/null 22 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 23 | -e DISPLAY=$ip:0 \ 24 | -v "$DIR":/root/code/bootcamp_pg \ 25 | -ti dementrock/deeprlbootcamp \ 26 | ${1-/bin/bash} "${@:2}" 27 | $xhost - $ip >/dev/null 28 | elif [[ $(uname) == 'Linux' ]]; then 29 | $xhost +local:root >/dev/null 30 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 31 | -e DISPLAY=$DISPLAY \ 32 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 33 | -v "$DIR":/root/code/bootcamp_pg \ 34 | -ti dementrock/deeprlbootcamp \ 35 | ${1-/bin/bash} "${@:2}" 36 | $xhost -local:root >/dev/null 37 | else 38 | echo "This script only supports macOS or Linux" 39 | fi 40 | -------------------------------------------------------------------------------- /labs/lab3/docker_run_vnc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | vnc_port=$("$DIR/findport.sh" 3000 1) 4 | viskit_port=$("$DIR/findport.sh" 5000 1) 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284" 12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 13 | -v "$DIR":/root/code/bootcamp_pg \ 14 | -ti dementrock/deeprlbootcamp \ 15 | ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}" 16 | -------------------------------------------------------------------------------- /labs/lab3/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | channels: 3 | - menpo 4 | - soumith 5 | dependencies: 6 | - python==3.5.3 7 | - opencv3=3.1.0 8 | - numpy==1.13.1 9 | - scipy==0.19.1 10 | - pip: 11 | - gym==0.9.2 12 | - chainer==2.0.1 13 | - ipdb==0.10.3 14 | - tblib==1.3.2 15 | - atari_py==0.1.1 16 | - Pillow==4.2.1 17 | - PyOpenGL==3.1.0 18 | - cloudpickle==0.3.1 19 | - click==6.7 20 | - python-dateutil==2.6.1 21 | - pyyaml==3.12 22 | - easydict==1.7 23 | - boto3==1.4.4 24 | - mako==1.0.7 25 | - redis==2.10.5 26 | - Flask==0.12.2 27 | - plotly==2.0.12 28 | - tqdm==4.14.0 29 | - cupy==1.0.1; 'linux' in sys_platform 30 | - cached-property==1.3.0 31 | - h5py==2.7.0 32 | -------------------------------------------------------------------------------- /labs/lab3/findport.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Please run as root. 4 | # Usage: bash findport.sh 3000 100 5 | # 6 | 7 | 8 | if [[ -z "$1" || -z "$2" ]]; then 9 | echo "Usage: $0 " 10 | exit 1 11 | fi 12 | 13 | 14 | BASE=$1 15 | INCREMENT=$2 16 | 17 | port=$BASE 18 | isfree=$(netstat -aln | grep $port) 19 | 20 | while [[ -n "$isfree" ]]; do 21 | port=$[port+INCREMENT] 22 | isfree=$(netstat -aln | grep $port) 23 | done 24 | 25 | echo "$port" 26 | exit 0 27 | -------------------------------------------------------------------------------- /labs/lab3/launch_bg_screen_buffer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | killall() { 4 | kill -INT "$xvfb_pid" 5 | kill -INT "$x11vnc_pid" 6 | exit 7 | } 8 | 9 | trap killall SIGINT 10 | trap killall SIGTERM 11 | trap killall SIGKILL 12 | 13 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$! 14 | 15 | mkdir ~/.x11vnc 16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd 17 | 18 | command="${1-/bin/bash} ${@:2}" 19 | 20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!" 21 | 22 | DISPLAY=:99 $command 23 | 24 | killall 25 | -------------------------------------------------------------------------------- /labs/lab3/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | See README.md for a description of the logging API. 4 | 5 | OFF state corresponds to having Logger.CURRENT == Logger.DEFAULT 6 | ON state is otherwise 7 | 8 | """ 9 | import datetime 10 | from collections import OrderedDict 11 | import os 12 | import sys 13 | import shutil 14 | import os.path as osp 15 | import json 16 | 17 | import dateutil.tz 18 | 19 | LOG_OUTPUT_FORMATS = ['stdout', 'log', 'json'] 20 | 21 | DEBUG = 10 22 | INFO = 20 23 | WARN = 30 24 | ERROR = 40 25 | 26 | DISABLED = 50 27 | 28 | 29 | class OutputFormat(object): 30 | def writekvs(self, kvs): 31 | """ 32 | Write key-value pairs 33 | """ 34 | raise NotImplementedError 35 | 36 | def writeseq(self, args): 37 | """ 38 | Write a sequence of other data (e.g. a logging message) 39 | """ 40 | pass 41 | 42 | def close(self): 43 | return 44 | 45 | 46 | class HumanOutputFormat(OutputFormat): 47 | def __init__(self, file): 48 | self.file = file 49 | 50 | def writekvs(self, kvs): 51 | # Create strings for printing 52 | key2str = OrderedDict() 53 | for (key, val) in kvs.items(): 54 | valstr = '%-8.5g' % (val,) if hasattr(val, '__float__') else val 55 | key2str[self._truncate(key)] = self._truncate(valstr) 56 | 57 | # Find max widths 58 | keywidth = max(map(len, key2str.keys())) 59 | valwidth = max(map(len, key2str.values())) 60 | 61 | # Write out the data 62 | dashes = '-' * (keywidth + valwidth + 7) 63 | lines = [dashes] 64 | for (key, val) in key2str.items(): 65 | lines.append('| %s%s | %s%s |' % ( 66 | key, 67 | ' ' * (keywidth - len(key)), 68 | val, 69 | ' ' * (valwidth - len(val)), 70 | )) 71 | lines.append(dashes) 72 | self.file.write('\n'.join(lines) + '\n') 73 | 74 | # Flush the output to the file 75 | self.file.flush() 76 | 77 | def _truncate(self, s): 78 | return s[:20] + '...' if len(s) > 23 else s 79 | 80 | def writeseq(self, args): 81 | for arg in args: 82 | self.file.write(arg) 83 | self.file.write('\n') 84 | self.file.flush() 85 | 86 | 87 | class JSONOutputFormat(OutputFormat): 88 | def __init__(self, file): 89 | self.file = file 90 | 91 | def writekvs(self, kvs): 92 | for k, v in kvs.items(): 93 | if hasattr(v, 'dtype'): 94 | v = v.tolist() 95 | kvs[k] = float(v) 96 | self.file.write(json.dumps(kvs) + '\n') 97 | self.file.flush() 98 | 99 | 100 | def make_output_format(format, ev_dir): 101 | os.makedirs(ev_dir, exist_ok=True) 102 | if format == 'stdout': 103 | return HumanOutputFormat(sys.stdout) 104 | elif format == 'log': 105 | log_file = open(osp.join(ev_dir, 'log.txt'), 'at') 106 | return HumanOutputFormat(log_file) 107 | elif format == 'json': 108 | json_file = open(osp.join(ev_dir, 'progress.json'), 'at') 109 | return JSONOutputFormat(json_file) 110 | else: 111 | raise ValueError('Unknown format specified: %s' % (format,)) 112 | 113 | 114 | # ================================================================ 115 | # API 116 | # ================================================================ 117 | 118 | 119 | def logkv(key, val): 120 | """ 121 | Log a value of some diagnostic 122 | Call this once for each diagnostic quantity, each iteration 123 | """ 124 | Logger.CURRENT.logkv(key, val) 125 | 126 | 127 | def dumpkvs(): 128 | """ 129 | Write all of the diagnostics from the current iteration 130 | 131 | level: int. (see old_logger.py docs) If the global logger level is higher than 132 | the level argument here, don't print to stdout. 133 | """ 134 | Logger.CURRENT.dumpkvs() 135 | 136 | 137 | # for backwards compatibility 138 | record_tabular = logkv 139 | dump_tabular = dumpkvs 140 | 141 | 142 | def log(*args, level=INFO): 143 | """ 144 | Write the sequence of args, with no separators, to the console and output files (if you've configured an output file). 145 | """ 146 | Logger.CURRENT.log(*args, level=level) 147 | 148 | 149 | def debug(*args): 150 | log(*args, level=DEBUG) 151 | 152 | 153 | def info(*args): 154 | log(*args, level=INFO) 155 | 156 | 157 | def warn(*args): 158 | log(*args, level=WARN) 159 | 160 | 161 | def error(*args): 162 | log(*args, level=ERROR) 163 | 164 | 165 | def set_level(level): 166 | """ 167 | Set logging threshold on current logger. 168 | """ 169 | Logger.CURRENT.set_level(level) 170 | 171 | 172 | def get_level(): 173 | """ 174 | Set logging threshold on current logger. 175 | """ 176 | return Logger.CURRENT.level 177 | 178 | 179 | def get_dir(): 180 | """ 181 | Get directory that log files are being written to. 182 | will be None if there is no output directory (i.e., if you didn't call start) 183 | """ 184 | return Logger.CURRENT.get_dir() 185 | 186 | 187 | def get_expt_dir(): 188 | sys.stderr.write( 189 | "get_expt_dir() is Deprecated. Switch to get_dir() [%s]\n" % (get_dir(),)) 190 | return get_dir() 191 | 192 | 193 | # ================================================================ 194 | # Backend 195 | # ================================================================ 196 | 197 | 198 | class Logger(object): 199 | # A logger with no output files. (See right below class definition) 200 | DEFAULT = None 201 | # So that you can still log to the terminal without setting up any output files 202 | CURRENT = None # Current logger being used by the free functions above 203 | 204 | def __init__(self, dir, output_formats): 205 | self.name2val = OrderedDict() # values this iteration 206 | self.level = INFO 207 | self.dir = dir 208 | self.output_formats = output_formats 209 | 210 | # Logging API, forwarded 211 | # ---------------------------------------- 212 | def logkv(self, key, val): 213 | self.name2val[key] = val 214 | 215 | def dumpkvs(self): 216 | for fmt in self.output_formats: 217 | fmt.writekvs(self.name2val) 218 | self.name2val.clear() 219 | 220 | def log(self, *args, level=INFO): 221 | now = datetime.datetime.now(dateutil.tz.tzlocal()) 222 | timestamp = now.strftime('[%Y-%m-%d %H:%M:%S.%f %Z] ') 223 | if self.level <= level: 224 | self._do_log((timestamp,) + args) 225 | 226 | # Configuration 227 | # ---------------------------------------- 228 | def set_level(self, level): 229 | self.level = level 230 | 231 | def get_dir(self): 232 | return self.dir 233 | 234 | def close(self): 235 | for fmt in self.output_formats: 236 | fmt.close() 237 | 238 | # Misc 239 | # ---------------------------------------- 240 | def _do_log(self, args): 241 | for fmt in self.output_formats: 242 | fmt.writeseq(args) 243 | 244 | 245 | # ================================================================ 246 | 247 | Logger.DEFAULT = Logger( 248 | output_formats=[HumanOutputFormat(sys.stdout)], dir=None) 249 | Logger.CURRENT = Logger.DEFAULT 250 | 251 | 252 | class session(object): 253 | """ 254 | Context manager that sets up the loggers for an experiment. 255 | """ 256 | 257 | CURRENT = None # Set to a LoggerContext object using enter/exit or context manager 258 | 259 | def __init__(self, dir, format_strs=None): 260 | self.dir = dir 261 | if format_strs is None: 262 | format_strs = LOG_OUTPUT_FORMATS 263 | output_formats = [make_output_format(f, dir) for f in format_strs] 264 | Logger.CURRENT = Logger(dir=dir, output_formats=output_formats) 265 | 266 | def __enter__(self): 267 | os.makedirs(self.evaluation_dir(), exist_ok=True) 268 | output_formats = [make_output_format( 269 | f, self.evaluation_dir()) for f in LOG_OUTPUT_FORMATS] 270 | Logger.CURRENT = Logger(dir=self.dir, output_formats=output_formats) 271 | 272 | def __exit__(self, *args): 273 | Logger.CURRENT.close() 274 | Logger.CURRENT = Logger.DEFAULT 275 | 276 | def evaluation_dir(self): 277 | return self.dir 278 | 279 | 280 | # ================================================================ 281 | 282 | 283 | def _demo(): 284 | info("hi") 285 | debug("shouldn't appear") 286 | set_level(DEBUG) 287 | debug("should appear") 288 | dir = "/tmp/testlogging" 289 | if os.path.exists(dir): 290 | shutil.rmtree(dir) 291 | with session(dir=dir): 292 | record_tabular("a", 3) 293 | record_tabular("b", 2.5) 294 | dump_tabular() 295 | record_tabular("b", -2.5) 296 | record_tabular("a", 5.5) 297 | dump_tabular() 298 | info("^^^ should see a = 5.5") 299 | 300 | record_tabular("b", -2.5) 301 | dump_tabular() 302 | 303 | record_tabular("a", "longasslongasslongasslongasslongasslongassvalue") 304 | dump_tabular() 305 | 306 | 307 | if __name__ == "__main__": 308 | _demo() 309 | -------------------------------------------------------------------------------- /labs/lab3/scripts/setup_xquartz.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Check if XQuartz is installed 3 | 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@" 5 | 6 | app_dir=/Applications/Utilities/XQuartz.app 7 | 8 | if [ -d $app_dir ]; then 9 | # Check installed version 10 | app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString) 11 | if [ $app_version == "2.7.11" ]; then 12 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 13 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 14 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 15 | echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!" 16 | exit 17 | else 18 | read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response 19 | case "$response" in 20 | [yY][eE][sS]|[yY]) 21 | ;; 22 | *) 23 | exit 24 | ;; 25 | esac 26 | fi 27 | fi 28 | 29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg 30 | dmg_path=/tmp/xquartz.dmg 31 | echo "Downloading dmg from $url..." 32 | /usr/bin/curl -L -o $dmg_path $url 33 | echo "Mounting dmg file..." 34 | hdiutil mount $dmg_path 35 | echo "Installing..." 36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg -target / 37 | 38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 41 | 42 | echo "Done! Make sure to log out and then log back in for the changes to take effect." 43 | -------------------------------------------------------------------------------- /labs/lab3/scripts/test_environment_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | def main(): 5 | import roboschool 6 | import gym 7 | import chainer 8 | env = gym.make('CartPole-v0') 9 | env.reset() 10 | env.step(env.action_space.sample()) 11 | env = gym.make('RoboschoolHalfCheetah-v1') 12 | env.reset() 13 | env.step(env.action_space.sample()) 14 | print("Your environment has been successfully set up!") 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /labs/lab3/simpledqn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3/simpledqn/__init__.py -------------------------------------------------------------------------------- /labs/lab3/simpledqn/gridworld_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | from six import StringIO, b 4 | 5 | from gym import utils 6 | from gym.envs.toy_text import discrete 7 | from gym.envs.registration import register 8 | 9 | LEFT = 0 10 | DOWN = 1 11 | RIGHT = 2 12 | UP = 3 13 | 14 | MAPS = { 15 | "4x4": [ 16 | "SFFF", 17 | "FFFH", 18 | "FFFF", 19 | "HFFG" 20 | ], 21 | "8x8": [ 22 | "SFFFFFFF", 23 | "FFFFFFFF", 24 | "FFFHFFFF", 25 | "FFFFFHFF", 26 | "FFFHFFFF", 27 | "FHHFFFHF", 28 | "FHFFHFHF", 29 | "FFFHFFFG" 30 | ], 31 | "9x9": [ 32 | "HFFFFFFFH", 33 | "FFFFFFFFF", 34 | "FFFFFFFFF", 35 | "FFFFFFFFF", 36 | "FFFFSFFFF", 37 | "FFFFFFFFF", 38 | "FFFFFFFFF", 39 | "FFFFFFFFF", 40 | "HFFFFFFFH" 41 | ] 42 | } 43 | 44 | 45 | def to_one_hot(x, len): 46 | one_hot = np.zeros(len) 47 | one_hot[x] = 1 48 | return one_hot 49 | 50 | 51 | class GridWorld(discrete.DiscreteEnv): 52 | """ 53 | Winter is here. You and your friends were tossing around a frisbee at the park 54 | when you made a wild throw that left the frisbee out in the middle of the lake. 55 | The water is mostly frozen, but there are a few holes where the ice has melted. 56 | If you step into one of those holes, you'll fall into the freezing water. 57 | At this time, there's an international frisbee shortage, so it's absolutely imperative that 58 | you navigate across the lake and retrieve the disc. 59 | However, the ice is slippery, so you won't always move in the direction you intend. 60 | The surface is described using a grid like the following 61 | 62 | SFFF 63 | FHFH 64 | FFFH 65 | HFFG 66 | 67 | S : starting point, safe 68 | F : frozen surface, safe 69 | H : hole, fall to your doom 70 | G : goal, where the frisbee is located 71 | 72 | The episode ends when you reach the goal or fall in a hole. 73 | You receive a reward of 1 if you reach the goal, and zero otherwise. 74 | 75 | """ 76 | 77 | metadata = {'render.modes': ['human', 'ansi']} 78 | 79 | def __init__(self, desc=None, map_name="4x4", is_slippery=False): 80 | if desc is None and map_name is None: 81 | raise ValueError('Must provide either desc or map_name') 82 | elif desc is None: 83 | desc = MAPS[map_name] 84 | self.desc = desc = np.asarray(desc, dtype='c') 85 | self.nrow, self.ncol = nrow, ncol = desc.shape 86 | 87 | nA = 4 88 | nS = nrow * ncol 89 | 90 | isd = np.array(desc == b'S').astype('float64').ravel() 91 | isd /= isd.sum() 92 | 93 | P = {s: {a: [] for a in range(nA)} for s in range(nS)} 94 | 95 | def to_s(row, col): 96 | return row * ncol + col 97 | 98 | def inc(row, col, a): 99 | if a == 0: # left 100 | col = max(col - 1, 0) 101 | elif a == 1: # down 102 | row = min(row + 1, nrow - 1) 103 | elif a == 2: # right 104 | col = min(col + 1, ncol - 1) 105 | elif a == 3: # up 106 | row = max(row - 1, 0) 107 | return (row, col) 108 | 109 | for row in range(nrow): 110 | for col in range(ncol): 111 | s = to_s(row, col) 112 | for a in range(4): 113 | li = P[s][a] 114 | letter = desc[row, col] 115 | if letter in b'GH': 116 | li.append((1.0, s, 0, True)) 117 | else: 118 | if is_slippery: 119 | for b in [(a - 1) % 4, a, (a + 1) % 4]: 120 | newrow, newcol = inc(row, col, b) 121 | newstate = to_s(newrow, newcol) 122 | newletter = desc[newrow, newcol] 123 | done = bytes(newletter) in b'GH' 124 | if newletter == b'G': 125 | rew = 1.0 126 | elif newletter == b'H': 127 | rew = .0 128 | else: 129 | rew = 0. 130 | # rew = float(newletter == b'G') 131 | li.append((1.0 / 3.0, newstate, rew, done)) 132 | else: 133 | newrow, newcol = inc(row, col, a) 134 | newstate = to_s(newrow, newcol) 135 | newletter = desc[newrow, newcol] 136 | done = bytes(newletter) in b'GH' 137 | # rew = float(newletter == b'G') 138 | if newletter == b'G': 139 | rew = 1.0 140 | elif newletter == b'H': 141 | rew = 0. 142 | else: 143 | rew = 0. 144 | li.append((1.0, newstate, rew, done)) 145 | 146 | super(GridWorld, self).__init__(nS, nA, P, isd) 147 | 148 | def _reset(self): 149 | s = super(GridWorld, self)._reset() 150 | return to_one_hot(s, self.nS) 151 | 152 | def _step(self, a): 153 | s, r, d, p = super(GridWorld, self)._step(a) 154 | return to_one_hot(s, self.nS), r, d, p 155 | 156 | def print_obs(self, obs): 157 | import copy 158 | map = copy.deepcopy(self.desc).astype(str) 159 | _obs = int(np.where(obs == 1)[0][0]) 160 | map[_obs // 9, _obs % 9] = 'X' 161 | for row in map: 162 | print(row) 163 | 164 | def _render(self, mode='human', close=False): 165 | if close: 166 | return 167 | outfile = StringIO() if mode == 'ansi' else sys.stdout 168 | 169 | row, col = self.s // self.ncol, self.s % self.ncol 170 | desc = self.desc.tolist() 171 | desc = [[c.decode('utf-8') for c in line] for line in desc] 172 | desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) 173 | if self.lastaction is not None: 174 | outfile.write(" ({})\n".format( 175 | ["Left", "Down", "Right", "Up"][self.lastaction])) 176 | else: 177 | outfile.write("\n") 178 | outfile.write("\n".join(''.join(line) for line in desc) + "\n") 179 | 180 | if mode != 'human': 181 | return outfile 182 | 183 | 184 | register( 185 | 'GridWorld-v0', 186 | entry_point='simpledqn.gridworld_env:GridWorld', 187 | timestep_limit=40, 188 | ) 189 | -------------------------------------------------------------------------------- /labs/lab3/simpledqn/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import pickle 4 | 5 | 6 | class ReplayBuffer(object): 7 | def __init__(self, max_size): 8 | """Simple replay buffer for storing sampled DQN (s, a, s', r) transitions as tuples. 9 | 10 | :param size: Maximum size of the replay buffer. 11 | """ 12 | self._buffer = [] 13 | self._max_size = max_size 14 | self._idx = 0 15 | 16 | def __len__(self): 17 | return len(self._buffer) 18 | 19 | def add(self, obs_t, act, rew, obs_tp1, done): 20 | """ 21 | Add a new sample to the replay buffer. 22 | :param obs_t: observation at time t 23 | :param act: action 24 | :param rew: reward 25 | :param obs_tp1: observation at time t+1 26 | :param done: termination signal (whether episode has finished or not) 27 | """ 28 | data = (obs_t, act, rew, obs_tp1, done) 29 | if self._idx >= len(self._buffer): 30 | self._buffer.append(data) 31 | else: 32 | self._buffer[self._idx] = data 33 | self._idx = (self._idx + 1) % self._max_size 34 | 35 | def _encode_sample(self, idxes): 36 | obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] 37 | for i in idxes: 38 | data = self._buffer[i] 39 | obs_t, action, reward, obs_tp1, done = data 40 | obses_t.append(np.array(obs_t, copy=False)) 41 | actions.append(np.array(action, copy=False)) 42 | rewards.append(reward) 43 | obses_tp1.append(np.array(obs_tp1, copy=False)) 44 | dones.append(done) 45 | return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones) 46 | 47 | def sample(self, batch_size): 48 | """Sample a batch of transition tuples. 49 | 50 | :param batch_size: Number of sampled transition tuples. 51 | :return: Tuple of transitions. 52 | """ 53 | idxes = [random.randint(0, len(self._buffer) - 1) 54 | for _ in range(batch_size)] 55 | return self._encode_sample(idxes) 56 | 57 | def dump(self, file_path=None): 58 | """Dump the replay buffer into a file. 59 | """ 60 | file = open(file_path, 'wb') 61 | pickle.dump(self._buffer, file, -1) 62 | file.close() 63 | 64 | def load(self, file_path=None): 65 | """Load the replay buffer from a file 66 | """ 67 | file = open(file_path, 'rb') 68 | self._buffer = pickle.load(file) 69 | file.close() 70 | -------------------------------------------------------------------------------- /labs/lab3/simpledqn/replay_buffer_warm_start.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3/simpledqn/replay_buffer_warm_start.pkl -------------------------------------------------------------------------------- /labs/lab3/simpledqn/simple_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.special 3 | import chainer 4 | 5 | 6 | # Compute gradient approximately using finite difference 7 | def numerical_grad(f, x, eps=1e-8): 8 | grad = np.zeros_like(x) 9 | for i in range(len(x)): 10 | xplus = np.array(x) 11 | xplus[i] += eps 12 | fplus = f(xplus) 13 | xminus = np.array(x) 14 | xminus[i] -= eps 15 | fminus = f(xminus) 16 | grad[i] = (fplus - fminus) / (2 * eps) 17 | return grad 18 | 19 | 20 | def gradient_check(f, g, x): 21 | # Test the implementation of g(x) = df/dx 22 | # Perform numerical differentiation and test it 23 | g_num = numerical_grad(f, x) 24 | g_test = g(x) 25 | try: 26 | np.testing.assert_allclose(g_num, g_test, rtol=1e-5) 27 | print("Gradient check passed!") 28 | except AssertionError as e: 29 | print(e) 30 | print("Warning: Gradient check didn't pass!") 31 | 32 | 33 | def log_softmax(logits): 34 | return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True) 35 | 36 | 37 | def softmax(logits): 38 | x = logits 39 | x = x - np.max(x, axis=-1, keepdims=True) 40 | x = np.exp(x) 41 | return x / np.sum(x, axis=-1, keepdims=True) 42 | 43 | 44 | def weighted_sample(logits, rng=np.random): 45 | weights = softmax(logits) 46 | return min( 47 | int(np.sum(rng.uniform() > np.cumsum(weights))), 48 | len(weights) - 1 49 | ) 50 | 51 | 52 | def include_bias(x): 53 | # Add a constant term (1.0) to each entry in x 54 | return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1) 55 | 56 | 57 | _tested = set() 58 | 59 | nprs = np.random.RandomState 60 | 61 | 62 | def assert_allclose(a, b): 63 | if isinstance(a, (np.ndarray, float, int)): 64 | np.testing.assert_allclose(a, b) 65 | elif isinstance(a, (tuple, list)): 66 | assert isinstance(b, (tuple, list)) 67 | assert len(a) == len(b) 68 | for a_i, b_i in zip(a, b): 69 | assert_allclose(a_i, b_i) 70 | elif isinstance(a, chainer.Variable): 71 | assert isinstance(b, chainer.Variable) 72 | assert_allclose(a.data, b.data) 73 | else: 74 | raise NotImplementedError 75 | 76 | 77 | def test_once(fn, kwargs, desired_output=None): 78 | if fn.__name__ in _tested: 79 | return 80 | _tested.add(fn.__name__) 81 | 82 | if callable(kwargs): 83 | kwargs = kwargs() 84 | 85 | if callable(desired_output): 86 | desired_output = desired_output() 87 | 88 | if desired_output is None: 89 | print("Desired output for %s:" % (fn.__name__), repr(fn(**kwargs))) 90 | exit() 91 | else: 92 | try: 93 | output = fn(**kwargs) 94 | assert_allclose(desired_output, output) 95 | print("Test for %s passed!" % (fn.__name__)) 96 | except AssertionError as e: 97 | print(e) 98 | print("Warning: test for %s didn't pass!" % (fn.__name__)) 99 | -------------------------------------------------------------------------------- /labs/lab3/simpledqn/weights_warm_start.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3/simpledqn/weights_warm_start.pkl -------------------------------------------------------------------------------- /labs/lab3/simpledqn/wrappers.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | 5 | class NoopResetEnv(gym.Wrapper): 6 | def __init__(self, env=None, noop_max=30): 7 | """Sample initial states by taking random number of no-ops on reset. 8 | No-op is assumed to be action 0. 9 | """ 10 | super(NoopResetEnv, self).__init__(env) 11 | self.noop_max = noop_max 12 | self.override_num_noops = None 13 | assert env.unwrapped.get_action_meanings()[0] == 'NOOP' 14 | 15 | def _reset(self): 16 | """ Do no-op action for a number of steps in [1, noop_max].""" 17 | self.env.reset() 18 | if self.override_num_noops is not None: 19 | noops = self.override_num_noops 20 | else: 21 | noops = np.random.randint(1, self.noop_max + 1) 22 | assert noops > 0 23 | obs = None 24 | for _ in range(noops): 25 | obs, _, done, _ = self.env.step(0) 26 | if done: 27 | obs = self.env.reset() 28 | return obs 29 | 30 | 31 | class EpisodicLifeEnv(gym.Wrapper): 32 | def __init__(self, env=None): 33 | """Make end-of-life == end-of-episode, but only reset on true game over. 34 | Done by DeepMind for the DQN and co. since it helps value estimation. 35 | """ 36 | super(EpisodicLifeEnv, self).__init__(env) 37 | self.lives = 0 38 | self.was_real_done = True 39 | self.was_real_reset = False 40 | 41 | def _step(self, action): 42 | obs, reward, done, info = self.env.step(action) 43 | self.was_real_done = done 44 | # check current lives, make loss of life terminal, 45 | # then update lives to handle bonus lives 46 | lives = self.env.unwrapped.ale.lives() 47 | if lives < self.lives and lives > 0: 48 | # for Qbert somtimes we stay in lives == 0 condtion for a few frames 49 | # so its important to keep lives > 0, so that we only reset once 50 | # the environment advertises done. 51 | done = True 52 | self.lives = lives 53 | return obs, reward, done, info 54 | 55 | def _reset(self): 56 | """Reset only when lives are exhausted. 57 | This way all states are still reachable even though lives are episodic, 58 | and the learner need not know about any of this behind-the-scenes. 59 | """ 60 | if self.was_real_done: 61 | obs = self.env.reset() 62 | self.was_real_reset = True 63 | else: 64 | # no-op step to advance from terminal/lost life state 65 | obs, _, _, _ = self.env.step(0) 66 | self.was_real_reset = False 67 | self.lives = self.env.unwrapped.ale.lives() 68 | return obs 69 | -------------------------------------------------------------------------------- /labs/lab3/viskit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3/viskit/__init__.py -------------------------------------------------------------------------------- /labs/lab3/viskit/core.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import itertools 3 | import json 4 | import os 5 | 6 | import numpy as np 7 | 8 | 9 | # from sandbox.rocky.utils.py_utils import AttrDict 10 | 11 | class AttrDict(dict): 12 | def __init__(self, *args, **kwargs): 13 | super(AttrDict, self).__init__(*args, **kwargs) 14 | self.__dict__ = self 15 | 16 | 17 | def unique(l): 18 | return list(set(l)) 19 | 20 | 21 | def flatten(l): 22 | return [item for sublist in l for item in sublist] 23 | 24 | 25 | def load_progress(progress_json_path, verbose=True): 26 | if verbose: 27 | print("Reading %s" % progress_json_path) 28 | entries = dict() 29 | rows = [] 30 | with open(progress_json_path, 'r') as f: 31 | lines = f.read().split('\n') 32 | for line in lines: 33 | if len(line) > 0: 34 | row = json.loads(line) 35 | rows.append(row) 36 | all_keys = set(k for row in rows for k in row.keys()) 37 | for k in all_keys: 38 | if k not in entries: 39 | entries[k] = [] 40 | for row in rows: 41 | if k in row: 42 | v = row[k] 43 | try: 44 | entries[k].append(float(v)) 45 | except: 46 | entries[k].append(np.nan) 47 | else: 48 | entries[k].append(np.nan) 49 | 50 | # entries[key] = [row.get(key, np.nan) for row in rows] 51 | # added_keys = set() 52 | # for k, v in row.items(): 53 | # if k not in entries: 54 | # entries[k] = [] 55 | # try: 56 | # entries[k].append(float(v)) 57 | # except: 58 | # entries[k].append(0.) 59 | # added_keys.add(k) 60 | # for k in entries.keys(): 61 | # if k not in added_keys: 62 | # entries[k].append(np.nan) 63 | entries = dict([(k, np.array(v)) for k, v in entries.items()]) 64 | return entries 65 | 66 | 67 | def flatten_dict(d): 68 | flat_params = dict() 69 | for k, v in d.items(): 70 | if isinstance(v, dict): 71 | v = flatten_dict(v) 72 | for subk, subv in flatten_dict(v).items(): 73 | flat_params[k + "." + subk] = subv 74 | else: 75 | flat_params[k] = v 76 | return flat_params 77 | 78 | 79 | def load_params(params_json_path): 80 | with open(params_json_path, 'r') as f: 81 | data = json.loads(f.read()) 82 | if "args_data" in data: 83 | del data["args_data"] 84 | if "exp_name" not in data: 85 | data["exp_name"] = params_json_path.split("/")[-2] 86 | return data 87 | 88 | 89 | def lookup(d, keys): 90 | if not isinstance(keys, list): 91 | keys = keys.split(".") 92 | for k in keys: 93 | if hasattr(d, "__getitem__"): 94 | if k in d: 95 | d = d[k] 96 | else: 97 | return None 98 | else: 99 | return None 100 | return d 101 | 102 | 103 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True): 104 | if isinstance(exp_folder_paths, str): 105 | exp_folder_paths = [exp_folder_paths] 106 | exps = [] 107 | for exp_folder_path in exp_folder_paths: 108 | exps += [x[0] for x in os.walk(exp_folder_path)] 109 | if verbose: 110 | print("finished walking exp folders") 111 | exps_data = [] 112 | for exp in exps: 113 | try: 114 | exp_path = exp 115 | variant_json_path = os.path.join(exp_path, "variant.json") 116 | progress_json_path = os.path.join(exp_path, "progress.json") 117 | progress = load_progress(progress_json_path, verbose=verbose) 118 | try: 119 | params = load_params(variant_json_path) 120 | except IOError: 121 | params = dict(exp_name="experiment") 122 | exps_data.append(AttrDict( 123 | progress=progress, params=params, flat_params=flatten_dict(params))) 124 | except IOError as e: 125 | if verbose: 126 | print(e) 127 | 128 | # a dictionary of all keys and types of values 129 | all_keys = dict() 130 | for data in exps_data: 131 | for key in data.flat_params.keys(): 132 | if key not in all_keys: 133 | all_keys[key] = type(data.flat_params[key]) 134 | 135 | # if any data does not have some key, specify the value of it 136 | if not ignore_missing_keys: 137 | default_values = dict() 138 | for data in exps_data: 139 | for key in sorted(all_keys.keys()): 140 | if key not in data.flat_params: 141 | if key not in default_values: 142 | default = None 143 | default_values[key] = default 144 | data.flat_params[key] = default_values[key] 145 | 146 | return exps_data 147 | 148 | 149 | def smart_repr(x): 150 | if isinstance(x, tuple): 151 | if len(x) == 0: 152 | return "tuple()" 153 | elif len(x) == 1: 154 | return "(%s,)" % smart_repr(x[0]) 155 | else: 156 | return "(" + ",".join(map(smart_repr, x)) + ")" 157 | else: 158 | if hasattr(x, "__call__"): 159 | return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__) 160 | else: 161 | return repr(x) 162 | 163 | 164 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1): 165 | try: 166 | stringified_pairs = sorted( 167 | map( 168 | eval, 169 | unique( 170 | flatten( 171 | [ 172 | list( 173 | map( 174 | smart_repr, 175 | list(d.flat_params.items()) 176 | ) 177 | ) 178 | for d in exps_data 179 | ] 180 | ) 181 | ) 182 | ), 183 | key=lambda x: ( 184 | tuple("" if it is None else str(it) for it in x), 185 | ) 186 | ) 187 | except Exception as e: 188 | print(e) 189 | import ipdb 190 | ipdb.set_trace() 191 | proposals = [(k, [x[1] for x in v]) 192 | for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])] 193 | filtered = [(k, v) for (k, v) in proposals if len(v) > l and all( 194 | [k.find(excluded_param) != 0 for excluded_param in excluded_params])] 195 | return filtered 196 | 197 | 198 | class Selector(object): 199 | def __init__(self, exps_data, filters=None, custom_filters=None): 200 | self._exps_data = exps_data 201 | if filters is None: 202 | self._filters = tuple() 203 | else: 204 | self._filters = tuple(filters) 205 | if custom_filters is None: 206 | self._custom_filters = [] 207 | else: 208 | self._custom_filters = custom_filters 209 | 210 | def where(self, k, v): 211 | return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters) 212 | 213 | def custom_filter(self, filter): 214 | return Selector(self._exps_data, self._filters, self._custom_filters + [filter]) 215 | 216 | def _check_exp(self, exp): 217 | # or exp.flat_params.get(k, None) is None 218 | return all( 219 | ((str(exp.flat_params.get(k, None)) == str(v) or ( 220 | k not in exp.flat_params)) for k, v in self._filters) 221 | ) and all(custom_filter(exp) for custom_filter in self._custom_filters) 222 | 223 | def extract(self): 224 | return list(filter(self._check_exp, self._exps_data)) 225 | 226 | def iextract(self): 227 | return filter(self._check_exp, self._exps_data) 228 | 229 | 230 | # Taken from plot.ly 231 | color_defaults = [ 232 | '#1f77b4', # muted blue 233 | '#ff7f0e', # safety orange 234 | '#2ca02c', # cooked asparagus green 235 | '#d62728', # brick red 236 | '#9467bd', # muted purple 237 | '#8c564b', # chestnut brown 238 | '#e377c2', # raspberry yogurt pink 239 | '#7f7f7f', # middle gray 240 | '#bcbd22', # curry yellow-green 241 | '#17becf' # blue-teal 242 | ] 243 | 244 | 245 | def hex_to_rgb(hex, opacity=1.0): 246 | if hex[0] == '#': 247 | hex = hex[1:] 248 | assert (len(hex) == 6) 249 | return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity) 250 | -------------------------------------------------------------------------------- /labs/lab3/viskit/static/css/dropdowns-enhancement.css: -------------------------------------------------------------------------------- 1 | .dropdown-menu > li > label { 2 | display: block; 3 | padding: 3px 20px; 4 | clear: both; 5 | font-weight: normal; 6 | line-height: 1.42857143; 7 | color: #333333; 8 | white-space: nowrap; 9 | } 10 | .dropdown-menu > li > label:hover, 11 | .dropdown-menu > li > label:focus { 12 | text-decoration: none; 13 | color: #262626; 14 | background-color: #f5f5f5; 15 | } 16 | .dropdown-menu > li > input:checked ~ label, 17 | .dropdown-menu > li > input:checked ~ label:hover, 18 | .dropdown-menu > li > input:checked ~ label:focus, 19 | .dropdown-menu > .active > label, 20 | .dropdown-menu > .active > label:hover, 21 | .dropdown-menu > .active > label:focus { 22 | color: #ffffff; 23 | text-decoration: none; 24 | outline: 0; 25 | background-color: #428bca; 26 | } 27 | .dropdown-menu > li > input[disabled] ~ label, 28 | .dropdown-menu > li > input[disabled] ~ label:hover, 29 | .dropdown-menu > li > input[disabled] ~ label:focus, 30 | .dropdown-menu > .disabled > label, 31 | .dropdown-menu > .disabled > label:hover, 32 | .dropdown-menu > .disabled > label:focus { 33 | color: #999999; 34 | } 35 | .dropdown-menu > li > input[disabled] ~ label:hover, 36 | .dropdown-menu > li > input[disabled] ~ label:focus, 37 | .dropdown-menu > .disabled > label:hover, 38 | .dropdown-menu > .disabled > label:focus { 39 | text-decoration: none; 40 | background-color: transparent; 41 | background-image: none; 42 | filter: progid:DXImageTransform.Microsoft.gradient(enabled = false); 43 | cursor: not-allowed; 44 | } 45 | .dropdown-menu > li > label { 46 | margin-bottom: 0; 47 | cursor: pointer; 48 | } 49 | .dropdown-menu > li > input[type="radio"], 50 | .dropdown-menu > li > input[type="checkbox"] { 51 | display: none; 52 | position: absolute; 53 | top: -9999em; 54 | left: -9999em; 55 | } 56 | .dropdown-menu > li > label:focus, 57 | .dropdown-menu > li > input:focus ~ label { 58 | outline: thin dotted; 59 | outline: 5px auto -webkit-focus-ring-color; 60 | outline-offset: -2px; 61 | } 62 | .dropdown-menu.pull-right { 63 | right: 0; 64 | left: auto; 65 | } 66 | .dropdown-menu.pull-top { 67 | bottom: 100%; 68 | top: auto; 69 | margin: 0 0 2px; 70 | -webkit-box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175); 71 | box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175); 72 | } 73 | .dropdown-menu.pull-center { 74 | right: 50%; 75 | left: auto; 76 | } 77 | .dropdown-menu.pull-middle { 78 | right: 100%; 79 | margin: 0 2px 0 0; 80 | box-shadow: -5px 0 10px rgba(0, 0, 0, 0.2); 81 | left: auto; 82 | } 83 | .dropdown-menu.pull-middle.pull-right { 84 | right: auto; 85 | left: 100%; 86 | margin: 0 0 0 2px; 87 | box-shadow: 5px 0 10px rgba(0, 0, 0, 0.2); 88 | } 89 | .dropdown-menu.pull-middle.pull-center { 90 | right: 50%; 91 | margin: 0; 92 | box-shadow: 0 0 10px rgba(0, 0, 0, 0.2); 93 | } 94 | .dropdown-menu.bullet { 95 | margin-top: 8px; 96 | } 97 | .dropdown-menu.bullet:before { 98 | width: 0; 99 | height: 0; 100 | content: ''; 101 | display: inline-block; 102 | position: absolute; 103 | border-color: transparent; 104 | border-style: solid; 105 | -webkit-transform: rotate(360deg); 106 | border-width: 0 7px 7px; 107 | border-bottom-color: #cccccc; 108 | border-bottom-color: rgba(0, 0, 0, 0.15); 109 | top: -7px; 110 | left: 9px; 111 | } 112 | .dropdown-menu.bullet:after { 113 | width: 0; 114 | height: 0; 115 | content: ''; 116 | display: inline-block; 117 | position: absolute; 118 | border-color: transparent; 119 | border-style: solid; 120 | -webkit-transform: rotate(360deg); 121 | border-width: 0 6px 6px; 122 | border-bottom-color: #ffffff; 123 | top: -6px; 124 | left: 10px; 125 | } 126 | .dropdown-menu.bullet.pull-right:before { 127 | left: auto; 128 | right: 9px; 129 | } 130 | .dropdown-menu.bullet.pull-right:after { 131 | left: auto; 132 | right: 10px; 133 | } 134 | .dropdown-menu.bullet.pull-top { 135 | margin-top: 0; 136 | margin-bottom: 8px; 137 | } 138 | .dropdown-menu.bullet.pull-top:before { 139 | top: auto; 140 | bottom: -7px; 141 | border-bottom-width: 0; 142 | border-top-width: 7px; 143 | border-top-color: #cccccc; 144 | border-top-color: rgba(0, 0, 0, 0.15); 145 | } 146 | .dropdown-menu.bullet.pull-top:after { 147 | top: auto; 148 | bottom: -6px; 149 | border-bottom: none; 150 | border-top-width: 6px; 151 | border-top-color: #ffffff; 152 | } 153 | .dropdown-menu.bullet.pull-center:before { 154 | left: auto; 155 | right: 50%; 156 | margin-right: -7px; 157 | } 158 | .dropdown-menu.bullet.pull-center:after { 159 | left: auto; 160 | right: 50%; 161 | margin-right: -6px; 162 | } 163 | .dropdown-menu.bullet.pull-middle { 164 | margin-right: 8px; 165 | } 166 | .dropdown-menu.bullet.pull-middle:before { 167 | top: 50%; 168 | left: 100%; 169 | right: auto; 170 | margin-top: -7px; 171 | border-right-width: 0; 172 | border-bottom-color: transparent; 173 | border-top-width: 7px; 174 | border-left-color: #cccccc; 175 | border-left-color: rgba(0, 0, 0, 0.15); 176 | } 177 | .dropdown-menu.bullet.pull-middle:after { 178 | top: 50%; 179 | left: 100%; 180 | right: auto; 181 | margin-top: -6px; 182 | border-right-width: 0; 183 | border-bottom-color: transparent; 184 | border-top-width: 6px; 185 | border-left-color: #ffffff; 186 | } 187 | .dropdown-menu.bullet.pull-middle.pull-right { 188 | margin-right: 0; 189 | margin-left: 8px; 190 | } 191 | .dropdown-menu.bullet.pull-middle.pull-right:before { 192 | left: -7px; 193 | border-left-width: 0; 194 | border-right-width: 7px; 195 | border-right-color: #cccccc; 196 | border-right-color: rgba(0, 0, 0, 0.15); 197 | } 198 | .dropdown-menu.bullet.pull-middle.pull-right:after { 199 | left: -6px; 200 | border-left-width: 0; 201 | border-right-width: 6px; 202 | border-right-color: #ffffff; 203 | } 204 | .dropdown-menu.bullet.pull-middle.pull-center { 205 | margin-left: 0; 206 | margin-right: 0; 207 | } 208 | .dropdown-menu.bullet.pull-middle.pull-center:before { 209 | border: none; 210 | display: none; 211 | } 212 | .dropdown-menu.bullet.pull-middle.pull-center:after { 213 | border: none; 214 | display: none; 215 | } 216 | .dropdown-submenu { 217 | position: relative; 218 | } 219 | .dropdown-submenu > .dropdown-menu { 220 | top: 0; 221 | left: 100%; 222 | margin-top: -6px; 223 | margin-left: -1px; 224 | border-top-left-radius: 0; 225 | } 226 | .dropdown-submenu > a:before { 227 | display: block; 228 | float: right; 229 | width: 0; 230 | height: 0; 231 | content: ""; 232 | margin-top: 6px; 233 | margin-right: -8px; 234 | border-width: 4px 0 4px 4px; 235 | border-style: solid; 236 | border-left-style: dashed; 237 | border-top-color: transparent; 238 | border-bottom-color: transparent; 239 | } 240 | @media (max-width: 767px) { 241 | .navbar-nav .dropdown-submenu > a:before { 242 | margin-top: 8px; 243 | border-color: inherit; 244 | border-style: solid; 245 | border-width: 4px 4px 0; 246 | border-left-color: transparent; 247 | border-right-color: transparent; 248 | } 249 | .navbar-nav .dropdown-submenu > a { 250 | padding-left: 40px; 251 | } 252 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > a, 253 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > label { 254 | padding-left: 35px; 255 | } 256 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > a, 257 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > label { 258 | padding-left: 45px; 259 | } 260 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a, 261 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label { 262 | padding-left: 55px; 263 | } 264 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a, 265 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label { 266 | padding-left: 65px; 267 | } 268 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a, 269 | .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label { 270 | padding-left: 75px; 271 | } 272 | } 273 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a, 274 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover, 275 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus { 276 | background-color: #e7e7e7; 277 | color: #555555; 278 | } 279 | @media (max-width: 767px) { 280 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before { 281 | border-top-color: #555555; 282 | } 283 | } 284 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a, 285 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover, 286 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus { 287 | background-color: #080808; 288 | color: #ffffff; 289 | } 290 | @media (max-width: 767px) { 291 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before { 292 | border-top-color: #ffffff; 293 | } 294 | } 295 | -------------------------------------------------------------------------------- /labs/lab4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4.pdf -------------------------------------------------------------------------------- /labs/lab4/alg_utils.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | 3 | 4 | # ============================== 5 | # Shared utilities 6 | # ============================== 7 | 8 | def compute_cumulative_returns(rewards, baselines, discount): 9 | # This method builds up the cumulative sum of discounted rewards for each time step: 10 | # R[t] = sum_{t'>=t} γ^(t'-t)*r_t' 11 | # Note that we use γ^(t'-t) instead of γ^t'. This gives us a biased gradient but lower variance 12 | returns = [] 13 | # Use the last baseline prediction to back up 14 | cum_return = baselines[-1] 15 | for reward in rewards[::-1]: 16 | cum_return = cum_return * discount + reward 17 | returns.append(cum_return) 18 | return returns[::-1] 19 | 20 | 21 | def compute_advantages(rewards, baselines, discount, gae_lambda): 22 | # Given returns R_t and baselines b(s_t), compute (generalized) advantage estimate A_t 23 | deltas = rewards + discount * baselines[1:] - baselines[:-1] 24 | advs = [] 25 | cum_adv = 0 26 | multiplier = discount * gae_lambda 27 | for delta in deltas[::-1]: 28 | cum_adv = cum_adv * multiplier + delta 29 | advs.append(cum_adv) 30 | return advs[::-1] 31 | 32 | 33 | def compute_pg_vars(trajs, policy, baseline, discount, gae_lambda): 34 | """ 35 | Compute chainer variables needed for various policy gradient algorithms 36 | """ 37 | for traj in trajs: 38 | # Include the last observation here, in case the trajectory is not finished 39 | baselines = baseline.predict(np.concatenate( 40 | [traj["observations"], [traj["last_observation"]]])) 41 | if traj['finished']: 42 | # If already finished, the future cumulative rewards starting from the final state is 0 43 | baselines[-1] = 0. 44 | # This is useful when fitting baselines. It uses the baseline prediction of the last state value to perform 45 | # Bellman backup if the trajectory is not finished. 46 | traj['returns'] = compute_cumulative_returns( 47 | traj['rewards'], baselines, discount) 48 | traj['advantages'] = compute_advantages( 49 | traj['rewards'], baselines, discount, gae_lambda) 50 | traj['baselines'] = baselines[:-1] 51 | 52 | # First, we compute a flattened list of observations, actions, and advantages 53 | all_obs = np.concatenate([traj['observations'] for traj in trajs], axis=0) 54 | all_acts = np.concatenate([traj['actions'] for traj in trajs], axis=0) 55 | all_advs = np.concatenate([traj['advantages'] for traj in trajs], axis=0) 56 | all_dists = { 57 | k: np.concatenate([traj['distributions'][k] for traj in trajs], axis=0) 58 | for k in trajs[0]['distributions'].keys() 59 | } 60 | 61 | # Normalizing the advantage values can make the algorithm more robust to reward scaling 62 | all_advs = (all_advs - np.mean(all_advs)) / (np.std(all_advs) + 1e-8) 63 | 64 | # Form chainer variables 65 | all_obs = Variable(all_obs) 66 | all_acts = Variable(all_acts) 67 | all_advs = Variable(all_advs.astype(np.float32, copy=False)) 68 | all_dists = policy.distribution.from_dict( 69 | {k: Variable(v) for k, v in all_dists.items()}) 70 | 71 | return all_obs, all_acts, all_advs, all_dists 72 | 73 | 74 | # ============================== 75 | # Helper methods for logging 76 | # ============================== 77 | 78 | def log_reward_statistics(env): 79 | # keep unwrapping until we get the monitor 80 | while not isinstance(env, gym.wrappers.Monitor): # and not isinstance() 81 | if not isinstance(env, gym.Wrapper): 82 | assert False 83 | env = env.env 84 | # env.unwrapped 85 | assert isinstance(env, gym.wrappers.Monitor) 86 | all_stats = None 87 | for _ in range(10): 88 | try: 89 | all_stats = gym.wrappers.monitoring.load_results(env.directory) 90 | except FileNotFoundError: 91 | time.sleep(1) 92 | continue 93 | if all_stats is not None: 94 | episode_rewards = all_stats['episode_rewards'] 95 | episode_lengths = all_stats['episode_lengths'] 96 | 97 | recent_episode_rewards = episode_rewards[-100:] 98 | recent_episode_lengths = episode_lengths[-100:] 99 | 100 | if len(recent_episode_rewards) > 0: 101 | logger.logkv('AverageReturn', np.mean(recent_episode_rewards)) 102 | logger.logkv('MinReturn', np.min(recent_episode_rewards)) 103 | logger.logkv('MaxReturn', np.max(recent_episode_rewards)) 104 | logger.logkv('StdReturn', np.std(recent_episode_rewards)) 105 | logger.logkv('AverageEpisodeLength', 106 | np.mean(recent_episode_lengths)) 107 | logger.logkv('MinEpisodeLength', np.min(recent_episode_lengths)) 108 | logger.logkv('MaxEpisodeLength', np.max(recent_episode_lengths)) 109 | logger.logkv('StdEpisodeLength', np.std(recent_episode_lengths)) 110 | 111 | logger.logkv('TotalNEpisodes', len(episode_rewards)) 112 | logger.logkv('TotalNSamples', np.sum(episode_lengths)) 113 | 114 | 115 | def log_baseline_statistics(trajs): 116 | # Specifically, compute the explained variance, defined as 117 | baselines = np.concatenate([traj['baselines'] for traj in trajs]) 118 | returns = np.concatenate([traj['returns'] for traj in trajs]) 119 | logger.logkv('ExplainedVariance', 120 | explained_variance_1d(baselines, returns)) 121 | 122 | 123 | def log_action_distribution_statistics(dists): 124 | with chainer.no_backprop_mode(): 125 | entropy = F.mean(dists.entropy()).data 126 | logger.logkv('Entropy', entropy) 127 | logger.logkv('Perplexity', np.exp(entropy)) 128 | if isinstance(dists, Gaussian): 129 | logger.logkv('AveragePolicyStd', F.mean( 130 | F.exp(dists.log_stds)).data) 131 | for idx in range(dists.log_stds.shape[-1]): 132 | logger.logkv('AveragePolicyStd[{}]'.format( 133 | idx), F.mean(F.exp(dists.log_stds[..., idx])).data) 134 | elif isinstance(dists, Categorical): 135 | probs = F.mean(F.softmax(dists.logits), axis=0).data 136 | for idx in range(len(probs)): 137 | logger.logkv('AveragePolicyProb[{}]'.format(idx), probs[idx]) 138 | -------------------------------------------------------------------------------- /labs/lab4/algs.py: -------------------------------------------------------------------------------- 1 | from pg import pg 2 | from trpo import trpo 3 | from a2c import a2c 4 | -------------------------------------------------------------------------------- /labs/lab4/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | viskit_port=$("$DIR/findport.py" 5000 1) 4 | xhost=xhost 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | if [[ $(uname) == 'Darwin' ]]; then 12 | # if xhost not defined, check 13 | if ! hash $xhost 2>/dev/null; then 14 | xhost=/opt/X11/bin/xhost 15 | if [ ! -f $xhost ]; then 16 | echo "xhost not found!" 17 | exit 18 | fi 19 | fi 20 | ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}') 21 | $xhost + $ip >/dev/null 22 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 23 | -e DISPLAY=$ip:0 \ 24 | -v "$DIR":/root/code/bootcamp_pg \ 25 | -ti dementrock/deeprlbootcamp \ 26 | ${1-/bin/bash} "${@:2}" 27 | $xhost - $ip >/dev/null 28 | elif [[ $(uname) == 'Linux' ]]; then 29 | $xhost +local:root >/dev/null 30 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 31 | -e DISPLAY=$DISPLAY \ 32 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 33 | -v "$DIR":/root/code/bootcamp_pg \ 34 | -ti dementrock/deeprlbootcamp \ 35 | ${1-/bin/bash} "${@:2}" 36 | $xhost -local:root >/dev/null 37 | else 38 | echo "This script only supports macOS or Linux" 39 | fi 40 | -------------------------------------------------------------------------------- /labs/lab4/docker_run_vnc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | vnc_port=$("$DIR/findport.py" 3000 1) 4 | viskit_port=$("$DIR/findport.py" 5000 1) 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284" 12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 13 | -v "$DIR":/root/code/bootcamp_pg \ 14 | -ti dementrock/deeprlbootcamp \ 15 | ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}" 16 | -------------------------------------------------------------------------------- /labs/lab4/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | channels: 3 | - menpo 4 | - soumith 5 | dependencies: 6 | - python==3.5.3 7 | - opencv3=3.1.0 8 | - numpy==1.13.1 9 | - scipy==0.19.1 10 | - pip: 11 | - gym==0.9.2 12 | - chainer==2.0.1 13 | - ipdb==0.10.3 14 | - tblib==1.3.2 15 | - atari_py==0.1.1 16 | - Pillow==4.2.1 17 | - PyOpenGL==3.1.0 18 | - cloudpickle==0.3.1 19 | - click==6.7 20 | - python-dateutil==2.6.1 21 | - pyyaml==3.12 22 | - easydict==1.7 23 | - boto3==1.4.4 24 | - mako==1.0.7 25 | - redis==2.10.5 26 | - Flask==0.12.2 27 | - plotly==2.0.12 28 | - tqdm==4.14.0 29 | - cupy==1.0.1; 'linux' in sys_platform 30 | - cached-property==1.3.0 31 | - h5py==2.7.0 32 | -------------------------------------------------------------------------------- /labs/lab4/experiments/run_a2c_breakout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from algs import a2c 3 | from env_makers import EnvMaker 4 | from models import CategoricalCNNPolicy 5 | from utils import SnapshotSaver 6 | import numpy as np 7 | import os 8 | import logger 9 | 10 | log_dir = "data/local/a2c-breakout" 11 | 12 | np.random.seed(42) 13 | 14 | # Clean up existing logs 15 | os.system("rm -rf {}".format(log_dir)) 16 | 17 | with logger.session(log_dir): 18 | env_maker = EnvMaker('BreakoutNoFrameskip-v4') 19 | env = env_maker.make() 20 | policy = CategoricalCNNPolicy( 21 | env.observation_space, env.action_space, env.spec) 22 | vf = policy.create_vf() 23 | a2c( 24 | env=env, 25 | env_maker=env_maker, 26 | n_envs=16, 27 | policy=policy, 28 | vf=vf, 29 | snapshot_saver=SnapshotSaver(log_dir, interval=10), 30 | ) 31 | -------------------------------------------------------------------------------- /labs/lab4/experiments/run_a2c_pong.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from algs import a2c 3 | from env_makers import EnvMaker 4 | from models import CategoricalCNNPolicy 5 | from utils import SnapshotSaver 6 | import numpy as np 7 | import os 8 | import logger 9 | 10 | log_dir = "data/local/a2c-pong" 11 | 12 | np.random.seed(42) 13 | 14 | # Clean up existing logs 15 | os.system("rm -rf {}".format(log_dir)) 16 | 17 | with logger.session(log_dir): 18 | env_maker = EnvMaker('PongNoFrameskip-v4') 19 | env = env_maker.make() 20 | policy = CategoricalCNNPolicy( 21 | env.observation_space, env.action_space, env.spec) 22 | vf = policy.create_vf() 23 | a2c( 24 | env=env, 25 | env_maker=env_maker, 26 | n_envs=16, 27 | policy=policy, 28 | vf=vf, 29 | snapshot_saver=SnapshotSaver(log_dir, interval=10), 30 | ) 31 | -------------------------------------------------------------------------------- /labs/lab4/experiments/run_a2c_pong_warm_start.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from utils import SnapshotSaver 3 | import numpy as np 4 | import os 5 | import logger 6 | import pickle 7 | 8 | log_dir = "data/local/a2c-pong-warm-start" 9 | 10 | np.random.seed(42) 11 | 12 | # Clean up existing logs 13 | os.system("rm -rf {}".format(log_dir)) 14 | 15 | with logger.session(log_dir): 16 | with open("pong_warm_start.pkl", "rb") as f: 17 | state = pickle.load(f) 18 | saver = SnapshotSaver(log_dir, interval=10) 19 | alg_state = state['alg_state'] 20 | env = alg_state['env_maker'].make() 21 | alg = state['alg'] 22 | alg(env=env, snapshot_saver=saver, **alg_state) 23 | -------------------------------------------------------------------------------- /labs/lab4/experiments/run_pg_cartpole.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import chainer 3 | 4 | from algs import pg 5 | from env_makers import EnvMaker 6 | from models import CategoricalMLPPolicy, MLPBaseline 7 | from utils import SnapshotSaver 8 | import numpy as np 9 | import os 10 | import logger 11 | 12 | log_dir = "data/local/pg-cartpole" 13 | 14 | np.random.seed(42) 15 | 16 | # Clean up existing logs 17 | os.system("rm -rf {}".format(log_dir)) 18 | 19 | with logger.session(log_dir): 20 | env_maker = EnvMaker('CartPole-v0') 21 | env = env_maker.make() 22 | policy = CategoricalMLPPolicy(observation_space=env.observation_space, action_space=env.action_space, 23 | env_spec=env.spec) 24 | baseline = MLPBaseline(observation_space=env.observation_space, action_space=env.action_space, 25 | env_spec=env.spec) 26 | pg( 27 | env=env, 28 | env_maker=env_maker, 29 | n_envs=16, 30 | policy=policy, 31 | baseline=baseline, 32 | batch_size=2000, 33 | n_iters=100, 34 | snapshot_saver=SnapshotSaver(log_dir), 35 | optimizer=chainer.optimizers.Adam(1e-2) 36 | ) 37 | -------------------------------------------------------------------------------- /labs/lab4/experiments/run_trpo_cartpole.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from algs import trpo 3 | from env_makers import EnvMaker 4 | from models import CategoricalMLPPolicy, MLPBaseline 5 | from utils import SnapshotSaver 6 | import numpy as np 7 | import os 8 | import logger 9 | 10 | log_dir = "data/local/trpo-cartpole" 11 | 12 | np.random.seed(42) 13 | 14 | # Clean up existing logs 15 | os.system("rm -rf {}".format(log_dir)) 16 | 17 | with logger.session(log_dir): 18 | env_maker = EnvMaker('CartPole-v0') 19 | env = env_maker.make() 20 | policy = CategoricalMLPPolicy( 21 | observation_space=env.observation_space, 22 | action_space=env.action_space, 23 | env_spec=env.spec 24 | ) 25 | baseline = MLPBaseline( 26 | observation_space=env.observation_space, 27 | action_space=env.action_space, 28 | env_spec=env.spec 29 | ) 30 | trpo( 31 | env=env, 32 | env_maker=env_maker, 33 | n_envs=16, 34 | policy=policy, 35 | baseline=baseline, 36 | batch_size=2000, 37 | n_iters=100, 38 | snapshot_saver=SnapshotSaver(log_dir) 39 | ) 40 | -------------------------------------------------------------------------------- /labs/lab4/experiments/run_trpo_half_cheetah.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import chainer 3 | 4 | from algs import trpo 5 | from env_makers import EnvMaker 6 | from models import GaussianMLPPolicy, MLPBaseline 7 | from utils import SnapshotSaver 8 | import numpy as np 9 | import os 10 | import logger 11 | 12 | log_dir = "data/local/trpo-half-cheetah" 13 | 14 | np.random.seed(42) 15 | 16 | # Clean up existing logs 17 | os.system("rm -rf {}".format(log_dir)) 18 | 19 | with logger.session(log_dir): 20 | env_maker = EnvMaker('RoboschoolHalfCheetah-v1') 21 | env = env_maker.make() 22 | policy = GaussianMLPPolicy( 23 | observation_space=env.observation_space, 24 | action_space=env.action_space, 25 | env_spec=env.spec, 26 | hidden_sizes=(256, 64), 27 | hidden_nonlinearity=chainer.functions.tanh, 28 | ) 29 | baseline = MLPBaseline( 30 | observation_space=env.observation_space, 31 | action_space=env.action_space, 32 | env_spec=env.spec, 33 | hidden_sizes=(256, 64), 34 | hidden_nonlinearity=chainer.functions.tanh, 35 | ) 36 | trpo( 37 | env=env, 38 | env_maker=env_maker, 39 | n_envs=16, 40 | policy=policy, 41 | baseline=baseline, 42 | batch_size=5000, 43 | n_iters=5000, 44 | snapshot_saver=SnapshotSaver(log_dir, interval=10), 45 | ) 46 | -------------------------------------------------------------------------------- /labs/lab4/experiments/run_trpo_pendulum.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import chainer 3 | 4 | from algs import trpo 5 | from env_makers import EnvMaker 6 | from models import GaussianMLPPolicy, MLPBaseline 7 | from utils import SnapshotSaver 8 | import numpy as np 9 | import os 10 | import logger 11 | 12 | log_dir = "data/local/trpo-pendulum" 13 | 14 | np.random.seed(42) 15 | 16 | # Clean up existing logs 17 | os.system("rm -rf {}".format(log_dir)) 18 | 19 | with logger.session(log_dir): 20 | env_maker = EnvMaker('Pendulum-v0') 21 | env = env_maker.make() 22 | policy = GaussianMLPPolicy( 23 | observation_space=env.observation_space, 24 | action_space=env.action_space, 25 | env_spec=env.spec, 26 | hidden_sizes=(64, 64), 27 | hidden_nonlinearity=chainer.functions.tanh, 28 | ) 29 | baseline = MLPBaseline( 30 | observation_space=env.observation_space, 31 | action_space=env.action_space, 32 | env_spec=env.spec, 33 | hidden_sizes=(64, 64), 34 | hidden_nonlinearity=chainer.functions.tanh, 35 | ) 36 | trpo( 37 | env=env, 38 | env_maker=env_maker, 39 | n_envs=16, 40 | policy=policy, 41 | baseline=baseline, 42 | batch_size=10000, 43 | n_iters=100, 44 | snapshot_saver=SnapshotSaver(log_dir), 45 | ) 46 | -------------------------------------------------------------------------------- /labs/lab4/findport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Usage: findport.py 3000 100 4 | # 5 | from __future__ import print_function 6 | import socket 7 | from contextlib import closing 8 | import sys 9 | 10 | if len(sys.argv) != 3: 11 | print("Usage: {} ".format(sys.argv[0])) 12 | sys.exit(1) 13 | 14 | base = int(sys.argv[1]) 15 | increment = int(sys.argv[2]) 16 | 17 | 18 | def find_free_port(): 19 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 20 | for port in range(base, 65536, increment): 21 | try: 22 | s.bind(('', port)) 23 | return s.getsockname()[1] 24 | except socket.error: 25 | continue 26 | 27 | 28 | print(find_free_port()) 29 | -------------------------------------------------------------------------------- /labs/lab4/launch_bg_screen_buffer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | killall() { 4 | kill -INT "$xvfb_pid" 5 | kill -INT "$x11vnc_pid" 6 | exit 7 | } 8 | 9 | trap killall SIGINT 10 | trap killall SIGTERM 11 | trap killall SIGKILL 12 | 13 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$! 14 | 15 | mkdir ~/.x11vnc 16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd 17 | 18 | command="${1-/bin/bash} ${@:2}" 19 | 20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!" 21 | 22 | DISPLAY=:99 $command 23 | 24 | killall 25 | -------------------------------------------------------------------------------- /labs/lab4/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | See README.md for a description of the logging API. 4 | 5 | OFF state corresponds to having Logger.CURRENT == Logger.DEFAULT 6 | ON state is otherwise 7 | 8 | """ 9 | import datetime 10 | from collections import OrderedDict 11 | import os 12 | import sys 13 | import shutil 14 | import os.path as osp 15 | import json 16 | 17 | import dateutil.tz 18 | 19 | LOG_OUTPUT_FORMATS = ['stdout', 'log', 'json'] 20 | 21 | DEBUG = 10 22 | INFO = 20 23 | WARN = 30 24 | ERROR = 40 25 | 26 | DISABLED = 50 27 | 28 | 29 | class OutputFormat(object): 30 | def writekvs(self, kvs): 31 | """ 32 | Write key-value pairs 33 | """ 34 | raise NotImplementedError 35 | 36 | def writeseq(self, args): 37 | """ 38 | Write a sequence of other data (e.g. a logging message) 39 | """ 40 | pass 41 | 42 | def close(self): 43 | return 44 | 45 | 46 | class HumanOutputFormat(OutputFormat): 47 | def __init__(self, file): 48 | self.file = file 49 | 50 | def writekvs(self, kvs): 51 | # Create strings for printing 52 | key2str = OrderedDict() 53 | for (key, val) in kvs.items(): 54 | valstr = '%-8.5g' % (val,) if hasattr(val, '__float__') else val 55 | key2str[self._truncate(key)] = self._truncate(valstr) 56 | 57 | # Find max widths 58 | keywidth = max(map(len, key2str.keys())) 59 | valwidth = max(map(len, key2str.values())) 60 | 61 | # Write out the data 62 | dashes = '-' * (keywidth + valwidth + 7) 63 | lines = [dashes] 64 | for (key, val) in key2str.items(): 65 | lines.append('| %s%s | %s%s |' % ( 66 | key, 67 | ' ' * (keywidth - len(key)), 68 | val, 69 | ' ' * (valwidth - len(val)), 70 | )) 71 | lines.append(dashes) 72 | self.file.write('\n'.join(lines) + '\n') 73 | 74 | # Flush the output to the file 75 | self.file.flush() 76 | 77 | def _truncate(self, s): 78 | return s[:20] + '...' if len(s) > 23 else s 79 | 80 | def writeseq(self, args): 81 | for arg in args: 82 | self.file.write(arg) 83 | self.file.write('\n') 84 | self.file.flush() 85 | 86 | 87 | class JSONOutputFormat(OutputFormat): 88 | def __init__(self, file): 89 | self.file = file 90 | 91 | def writekvs(self, kvs): 92 | for k, v in kvs.items(): 93 | if hasattr(v, 'dtype'): 94 | v = v.tolist() 95 | kvs[k] = float(v) 96 | self.file.write(json.dumps(kvs) + '\n') 97 | self.file.flush() 98 | 99 | 100 | def make_output_format(format, ev_dir): 101 | os.makedirs(ev_dir, exist_ok=True) 102 | if format == 'stdout': 103 | return HumanOutputFormat(sys.stdout) 104 | elif format == 'log': 105 | log_file = open(osp.join(ev_dir, 'log.txt'), 'at') 106 | return HumanOutputFormat(log_file) 107 | elif format == 'json': 108 | json_file = open(osp.join(ev_dir, 'progress.json'), 'at') 109 | return JSONOutputFormat(json_file) 110 | else: 111 | raise ValueError('Unknown format specified: %s' % (format,)) 112 | 113 | 114 | # ================================================================ 115 | # API 116 | # ================================================================ 117 | 118 | 119 | def logkv(key, val): 120 | """ 121 | Log a value of some diagnostic 122 | Call this once for each diagnostic quantity, each iteration 123 | """ 124 | Logger.CURRENT.logkv(key, val) 125 | 126 | 127 | def dumpkvs(): 128 | """ 129 | Write all of the diagnostics from the current iteration 130 | 131 | level: int. (see old_logger.py docs) If the global logger level is higher than 132 | the level argument here, don't print to stdout. 133 | """ 134 | Logger.CURRENT.dumpkvs() 135 | 136 | 137 | # for backwards compatibility 138 | record_tabular = logkv 139 | dump_tabular = dumpkvs 140 | 141 | 142 | def log(*args, level=INFO): 143 | """ 144 | Write the sequence of args, with no separators, to the console and output files (if you've configured an output file). 145 | """ 146 | Logger.CURRENT.log(*args, level=level) 147 | 148 | 149 | def debug(*args): 150 | log(*args, level=DEBUG) 151 | 152 | 153 | def info(*args): 154 | log(*args, level=INFO) 155 | 156 | 157 | def warn(*args): 158 | log(*args, level=WARN) 159 | 160 | 161 | def error(*args): 162 | log(*args, level=ERROR) 163 | 164 | 165 | def set_level(level): 166 | """ 167 | Set logging threshold on current logger. 168 | """ 169 | Logger.CURRENT.set_level(level) 170 | 171 | 172 | def get_level(): 173 | """ 174 | Set logging threshold on current logger. 175 | """ 176 | return Logger.CURRENT.level 177 | 178 | 179 | def get_dir(): 180 | """ 181 | Get directory that log files are being written to. 182 | will be None if there is no output directory (i.e., if you didn't call start) 183 | """ 184 | return Logger.CURRENT.get_dir() 185 | 186 | 187 | def get_expt_dir(): 188 | sys.stderr.write( 189 | "get_expt_dir() is Deprecated. Switch to get_dir() [%s]\n" % (get_dir(),)) 190 | return get_dir() 191 | 192 | 193 | # ================================================================ 194 | # Backend 195 | # ================================================================ 196 | 197 | 198 | class Logger(object): 199 | # A logger with no output files. (See right below class definition) 200 | DEFAULT = None 201 | # So that you can still log to the terminal without setting up any output files 202 | CURRENT = None # Current logger being used by the free functions above 203 | 204 | def __init__(self, dir, output_formats): 205 | self.name2val = OrderedDict() # values this iteration 206 | self.level = INFO 207 | self.dir = dir 208 | self.output_formats = output_formats 209 | 210 | # Logging API, forwarded 211 | # ---------------------------------------- 212 | def logkv(self, key, val): 213 | self.name2val[key] = val 214 | 215 | def dumpkvs(self): 216 | for fmt in self.output_formats: 217 | fmt.writekvs(self.name2val) 218 | self.name2val.clear() 219 | 220 | def log(self, *args, level=INFO): 221 | now = datetime.datetime.now(dateutil.tz.tzlocal()) 222 | timestamp = now.strftime('[%Y-%m-%d %H:%M:%S.%f %Z] ') 223 | if self.level <= level: 224 | self._do_log((timestamp,) + args) 225 | 226 | # Configuration 227 | # ---------------------------------------- 228 | def set_level(self, level): 229 | self.level = level 230 | 231 | def get_dir(self): 232 | return self.dir 233 | 234 | def close(self): 235 | for fmt in self.output_formats: 236 | fmt.close() 237 | 238 | # Misc 239 | # ---------------------------------------- 240 | def _do_log(self, args): 241 | for fmt in self.output_formats: 242 | fmt.writeseq(args) 243 | 244 | 245 | # ================================================================ 246 | 247 | Logger.DEFAULT = Logger( 248 | output_formats=[HumanOutputFormat(sys.stdout)], dir=None) 249 | Logger.CURRENT = Logger.DEFAULT 250 | 251 | 252 | class session(object): 253 | """ 254 | Context manager that sets up the loggers for an experiment. 255 | """ 256 | 257 | CURRENT = None # Set to a LoggerContext object using enter/exit or context manager 258 | 259 | def __init__(self, dir, format_strs=None): 260 | self.dir = dir 261 | if format_strs is None: 262 | format_strs = LOG_OUTPUT_FORMATS 263 | output_formats = [make_output_format(f, dir) for f in format_strs] 264 | Logger.CURRENT = Logger(dir=dir, output_formats=output_formats) 265 | 266 | def __enter__(self): 267 | os.makedirs(self.evaluation_dir(), exist_ok=True) 268 | output_formats = [make_output_format( 269 | f, self.evaluation_dir()) for f in LOG_OUTPUT_FORMATS] 270 | Logger.CURRENT = Logger(dir=self.dir, output_formats=output_formats) 271 | 272 | def __exit__(self, *args): 273 | Logger.CURRENT.close() 274 | Logger.CURRENT = Logger.DEFAULT 275 | 276 | def evaluation_dir(self): 277 | return self.dir 278 | 279 | 280 | # ================================================================ 281 | 282 | 283 | def _demo(): 284 | info("hi") 285 | debug("shouldn't appear") 286 | set_level(DEBUG) 287 | debug("should appear") 288 | dir = "/tmp/testlogging" 289 | if os.path.exists(dir): 290 | shutil.rmtree(dir) 291 | with session(dir=dir): 292 | record_tabular("a", 3) 293 | record_tabular("b", 2.5) 294 | dump_tabular() 295 | record_tabular("b", -2.5) 296 | record_tabular("a", 5.5) 297 | dump_tabular() 298 | info("^^^ should see a = 5.5") 299 | 300 | record_tabular("b", -2.5) 301 | dump_tabular() 302 | 303 | record_tabular("a", "longasslongasslongasslongasslongasslongassvalue") 304 | dump_tabular() 305 | 306 | 307 | if __name__ == "__main__": 308 | _demo() 309 | -------------------------------------------------------------------------------- /labs/lab4/pg.py: -------------------------------------------------------------------------------- 1 | from alg_utils import * 2 | from simplepg.simple_utils import test_once, nprs 3 | import tests.pg_tests 4 | 5 | 6 | def pg(env, env_maker, policy, baseline, n_envs=mp.cpu_count(), last_iter=-1, n_iters=100, batch_size=1000, 7 | optimizer=chainer.optimizers.Adam(), discount=0.99, gae_lambda=0.97, snapshot_saver=None): 8 | """ 9 | This method implements policy gradient algorithm. 10 | :param env: An environment instance, which should have the same class as what env_maker.make() returns. 11 | :param env_maker: An object such that calling env_maker.make() will generate a new environment. 12 | :param policy: A stochastic policy which we will be optimizing. 13 | :param baseline: A baseline used for variance reduction and estimating future returns for unfinished trajectories. 14 | :param n_envs: Number of environments running simultaneously. 15 | :param last_iter: The index of the last iteration. This is normally -1 when starting afresh, but may be different when 16 | loaded from a snapshot. 17 | :param n_iters: The total number of iterations to run. 18 | :param batch_size: The number of samples used per iteration. 19 | :param optimizer: A Chainer optimizer instance. By default we use the Adam algorithm with learning rate 1e-3. 20 | :param discount: Discount factor. 21 | :param gae_lambda: Lambda parameter used for generalized advantage estimation. 22 | :param snapshot_saver: An object for saving snapshots. 23 | """ 24 | 25 | if getattr(optimizer, 'target', None) is not policy: 26 | optimizer.setup(policy) 27 | 28 | logger.info("Starting env pool") 29 | with EnvPool(env_maker, n_envs=n_envs) as env_pool: 30 | for iter in range(last_iter + 1, n_iters): 31 | logger.info("Starting iteration {}".format(iter)) 32 | logger.logkv('Iteration', iter) 33 | 34 | logger.info("Start collecting samples") 35 | trajs = parallel_collect_samples(env_pool, policy, batch_size) 36 | 37 | logger.info("Computing input variables for policy optimization") 38 | all_obs, all_acts, all_advs, _ = compute_pg_vars( 39 | trajs, policy, baseline, discount, gae_lambda 40 | ) 41 | 42 | # Begin policy update 43 | 44 | # Now, you need to implement the computation of the policy gradient 45 | # The policy gradient is given by -1/T \sum_t \nabla_\theta(log(p_\theta(a_t|s_t))) * A_t 46 | # Note the negative sign in the front, since optimizers are most often minimizing a loss rather 47 | # This is the same as \nabla_\theta(-1/T \sum_t log(p_\theta(a_t|s_t)) * A_t) = \nabla_\theta(L), where L is the surrogate loss term 48 | 49 | logger.info("Computing policy gradient") 50 | 51 | # Methods that may be useful: 52 | # - `dists.logli(actions)' returns the log probability of the actions under the distribution `dists'. 53 | # This method returns a chainer variable. 54 | 55 | dists = policy.compute_dists(all_obs) 56 | 57 | def compute_surr_loss(dists, all_acts, all_advs): 58 | """ 59 | :param dists: An instance of subclass of Distribution 60 | :param all_acts: A chainer variable, which should be a matrix of size N * |A| 61 | :param all_advs: A chainer variable, which should be a vector of size N 62 | :return: A chainer variable, which should be a scalar 63 | """ 64 | return -F.mean(dists.logli(all_acts)*all_advs) 65 | 66 | test_once(compute_surr_loss) 67 | 68 | surr_loss = compute_surr_loss(dists, all_acts, all_advs) 69 | 70 | # reset gradients stored in the policy parameters 71 | policy.cleargrads() 72 | surr_loss.backward() 73 | 74 | # apply the computed gradient 75 | optimizer.update() 76 | 77 | # Update baseline 78 | logger.info("Updating baseline") 79 | baseline.update(trajs) 80 | 81 | # log statistics 82 | logger.info("Computing logging information") 83 | logger.logkv('SurrLoss', surr_loss.data) 84 | log_action_distribution_statistics(dists) 85 | log_reward_statistics(env) 86 | log_baseline_statistics(trajs) 87 | logger.dumpkvs() 88 | 89 | if snapshot_saver is not None: 90 | logger.info("Saving snapshot") 91 | snapshot_saver.save_state( 92 | iter, 93 | dict( 94 | alg=pg, 95 | alg_state=dict( 96 | env_maker=env_maker, 97 | policy=policy, 98 | baseline=baseline, 99 | n_envs=n_envs, 100 | last_iter=iter, 101 | n_iters=n_iters, 102 | batch_size=batch_size, 103 | optimizer=optimizer, 104 | discount=discount, 105 | gae_lambda=gae_lambda 106 | ) 107 | ) 108 | ) 109 | -------------------------------------------------------------------------------- /labs/lab4/pong_warm_start.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/pong_warm_start.pkl -------------------------------------------------------------------------------- /labs/lab4/scripts/resume_training.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from utils import SnapshotSaver 3 | import click 4 | import logger 5 | 6 | 7 | @click.command() 8 | @click.argument("dir") # , "Directory which contains snapshot files") 9 | @click.option("--interval", help="Interval between saving snapshots", type=int, default=10) 10 | def main(dir, interval): 11 | with logger.session(dir): 12 | saver = SnapshotSaver(dir, interval=interval) 13 | state = saver.get_state() 14 | alg_state = state['alg_state'] 15 | env = alg_state['env_maker'].make() 16 | alg = state['alg'] 17 | alg(env=env, snapshot_saver=saver, **alg_state) 18 | 19 | 20 | if __name__ == "__main__": 21 | main() 22 | -------------------------------------------------------------------------------- /labs/lab4/scripts/setup_xquartz.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Check if XQuartz is installed 3 | 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@" 5 | 6 | app_dir=/Applications/Utilities/XQuartz.app 7 | 8 | if [ -d $app_dir ]; then 9 | # Check installed version 10 | app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString) 11 | if [ $app_version == "2.7.11" ]; then 12 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 13 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 14 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 15 | echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!" 16 | exit 17 | else 18 | read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response 19 | case "$response" in 20 | [yY][eE][sS]|[yY]) 21 | ;; 22 | *) 23 | exit 24 | ;; 25 | esac 26 | fi 27 | fi 28 | 29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg 30 | dmg_path=/tmp/xquartz.dmg 31 | echo "Downloading dmg from $url..." 32 | /usr/bin/curl -L -o $dmg_path $url 33 | echo "Mounting dmg file..." 34 | hdiutil mount $dmg_path 35 | echo "Installing..." 36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg -target / 37 | 38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 41 | 42 | echo "Done! Make sure to log out and then log back in for the changes to take effect." 43 | -------------------------------------------------------------------------------- /labs/lab4/scripts/sim_policy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from utils import SnapshotSaver 3 | import click 4 | import time 5 | import os 6 | 7 | 8 | @click.command() 9 | @click.argument("dir") 10 | def main(dir): 11 | env = None 12 | while True: 13 | saver = SnapshotSaver(dir) 14 | state = saver.get_state() 15 | if state is None: 16 | time.sleep(1) 17 | continue 18 | alg_state = state['alg_state'] 19 | if env is None: 20 | env = alg_state['env_maker'].make() 21 | policy = alg_state['policy'] 22 | ob = env.reset() 23 | done = False 24 | while not done: 25 | action, _ = policy.get_action(ob) 26 | ob, _, done, _ = env.step(action) 27 | env.render() 28 | 29 | 30 | if __name__ == "__main__": 31 | main() 32 | -------------------------------------------------------------------------------- /labs/lab4/scripts/test_environment_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | def main(): 5 | import roboschool 6 | import gym 7 | import chainer 8 | env = gym.make('CartPole-v0') 9 | env.reset() 10 | env.step(env.action_space.sample()) 11 | env = gym.make('RoboschoolHalfCheetah-v1') 12 | env.reset() 13 | env.step(env.action_space.sample()) 14 | print("Your environment has been successfully set up!") 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /labs/lab4/simplepg/__pycache__/point_env.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/simplepg/__pycache__/point_env.cpython-35.pyc -------------------------------------------------------------------------------- /labs/lab4/simplepg/__pycache__/simple_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/simplepg/__pycache__/simple_utils.cpython-35.pyc -------------------------------------------------------------------------------- /labs/lab4/simplepg/point_env.py: -------------------------------------------------------------------------------- 1 | from gym import Env 2 | from gym.envs.registration import register 3 | from gym.utils import seeding 4 | from gym import spaces 5 | from gym.envs.classic_control.cartpole import CartPoleEnv 6 | import numpy as np 7 | 8 | 9 | class PointEnv(Env): 10 | metadata = { 11 | 'render.modes': ['human', 'rgb_array'], 12 | 'video.frames_per_second': 50 13 | } 14 | 15 | def __init__(self): 16 | self.action_space = spaces.Box(low=-1, high=1, shape=(2,)) 17 | self.observation_space = spaces.Box(low=-1, high=1, shape=(2,)) 18 | 19 | self._seed() 20 | self.viewer = None 21 | self.state = None 22 | 23 | def _seed(self, seed=None): 24 | self.np_random, seed = seeding.np_random(seed) 25 | return [seed] 26 | 27 | def _step(self, action): 28 | action = np.clip(action, -0.025, 0.025) 29 | self.state = np.clip(self.state + action, -1, 1) 30 | return np.array(self.state), -np.linalg.norm(self.state), False, {} 31 | 32 | def _reset(self): 33 | while True: 34 | self.state = self.np_random.uniform(low=-1, high=1, size=(2,)) 35 | # Sample states that are far away 36 | if np.linalg.norm(self.state) > 0.9: 37 | break 38 | return np.array(self.state) 39 | 40 | # def _render(self, mode='human', close=False): 41 | # pass 42 | 43 | def _render(self, mode='human', close=False): 44 | if close: 45 | if self.viewer is not None: 46 | self.viewer.close() 47 | self.viewer = None 48 | return 49 | 50 | screen_width = 800 51 | screen_height = 800 52 | 53 | if self.viewer is None: 54 | from gym.envs.classic_control import rendering 55 | self.viewer = rendering.Viewer(screen_width, screen_height) 56 | 57 | agent = rendering.make_circle( 58 | min(screen_height, screen_width) * 0.03) 59 | origin = rendering.make_circle( 60 | min(screen_height, screen_width) * 0.03) 61 | trans = rendering.Transform(translation=(0, 0)) 62 | agent.add_attr(trans) 63 | self.trans = trans 64 | agent.set_color(1, 0, 0) 65 | origin.set_color(0, 0, 0) 66 | origin.add_attr(rendering.Transform( 67 | translation=(screen_width // 2, screen_height // 2))) 68 | self.viewer.add_geom(agent) 69 | self.viewer.add_geom(origin) 70 | 71 | # self.trans.set_translation(0, 0) 72 | self.trans.set_translation( 73 | (self.state[0] + 1) / 2 * screen_width, 74 | (self.state[1] + 1) / 2 * screen_height, 75 | ) 76 | 77 | return self.viewer.render(return_rgb_array=mode == 'rgb_array') 78 | 79 | 80 | register( 81 | 'Point-v0', 82 | entry_point='simplepg.point_env:PointEnv', 83 | timestep_limit=40, 84 | ) 85 | -------------------------------------------------------------------------------- /labs/lab4/simplepg/rollout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import click 3 | import numpy as np 4 | import gym 5 | 6 | from simplepg.simple_utils import include_bias, weighted_sample 7 | 8 | 9 | def point_get_action(theta, ob, rng=np.random): 10 | ob_1 = include_bias(ob) 11 | mean = theta.dot(ob_1) 12 | return rng.normal(loc=mean, scale=1.) 13 | 14 | 15 | def cartpole_get_action(theta, ob, rng=np.random): 16 | ob_1 = include_bias(ob) 17 | logits = ob_1.dot(theta.T) 18 | return weighted_sample(logits, rng=rng) 19 | 20 | 21 | @click.command() 22 | @click.argument("env_id", type=str, default="Point-v0") 23 | def main(env_id): 24 | # Register the environment 25 | rng = np.random.RandomState(42) 26 | 27 | if env_id == 'CartPole-v0': 28 | env = gym.make('CartPole-v0') 29 | get_action = cartpole_get_action 30 | obs_dim = env.observation_space.shape[0] 31 | action_dim = env.action_space.n 32 | elif env_id == 'Point-v0': 33 | from simplepg import point_env 34 | env = gym.make('Point-v0') 35 | get_action = point_get_action 36 | obs_dim = env.observation_space.shape[0] 37 | action_dim = env.action_space.shape[0] 38 | else: 39 | raise ValueError( 40 | "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'") 41 | 42 | env.seed(42) 43 | 44 | # Initialize parameters 45 | theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1)) 46 | 47 | while True: 48 | ob = env.reset() 49 | done = False 50 | # Only render the first trajectory 51 | # Collect a new trajectory 52 | rewards = [] 53 | while not done: 54 | action = get_action(theta, ob, rng=rng) 55 | next_ob, rew, done, _ = env.step(action) 56 | ob = next_ob 57 | env.render() 58 | rewards.append(rew) 59 | 60 | print("Episode reward: %.2f" % np.sum(rewards)) 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /labs/lab4/simplepg/simple_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.special 3 | import chainer 4 | 5 | 6 | # Compute gradient approximately using finite difference 7 | def numerical_grad(f, x, eps=1e-8): 8 | grad = np.zeros_like(x) 9 | for i in range(len(x)): 10 | xplus = np.array(x) 11 | xplus[i] += eps 12 | fplus = f(xplus) 13 | xminus = np.array(x) 14 | xminus[i] -= eps 15 | fminus = f(xminus) 16 | grad[i] = (fplus - fminus) / (2 * eps) 17 | return grad 18 | 19 | 20 | def gradient_check(f, g, x): 21 | # Test the implementation of g(x) = df/dx 22 | # Perform numerical differentiation and test it 23 | g_num = numerical_grad(f, x) 24 | g_test = g(x) 25 | try: 26 | np.testing.assert_allclose(g_num, g_test, rtol=1e-5) 27 | print("Gradient check passed!") 28 | except AssertionError as e: 29 | print(e) 30 | print("Error: Gradient check didn't pass!") 31 | exit() 32 | 33 | 34 | def log_softmax(logits): 35 | return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True) 36 | 37 | 38 | def softmax(logits): 39 | x = logits 40 | x = x - np.max(x, axis=-1, keepdims=True) 41 | x = np.exp(x) 42 | return x / np.sum(x, axis=-1, keepdims=True) 43 | 44 | 45 | def weighted_sample(logits, rng=np.random): 46 | weights = softmax(logits) 47 | return min( 48 | int(np.sum(rng.uniform() > np.cumsum(weights))), 49 | len(weights) - 1 50 | ) 51 | 52 | 53 | def include_bias(x): 54 | # Add a constant term (1.0) to each entry in x 55 | return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1) 56 | 57 | 58 | _tested = set() 59 | _tests = dict() 60 | 61 | nprs = np.random.RandomState 62 | 63 | 64 | def register_test(fn_name, kwargs, desired_output=None): 65 | assert fn_name not in _tests 66 | _tests[fn_name] = (kwargs, desired_output) 67 | 68 | 69 | def assert_allclose(a, b): 70 | if isinstance(a, (np.ndarray, float, int)): 71 | np.testing.assert_allclose(a, b, rtol=1e-5) 72 | elif isinstance(a, (tuple, list)): 73 | assert isinstance(b, (tuple, list)) 74 | assert len(a) == len(b) 75 | for a_i, b_i in zip(a, b): 76 | assert_allclose(a_i, b_i) 77 | elif isinstance(a, chainer.Variable): 78 | assert isinstance(b, chainer.Variable) 79 | assert_allclose(a.data, b.data) 80 | else: 81 | raise NotImplementedError 82 | 83 | 84 | def test_once(fn): 85 | module = fn.__module__ 86 | name = fn.__name__ 87 | key = module + "." + name 88 | if key in _tested: 89 | return 90 | assert key in _tests, "Test for %s not found!" % key 91 | kwargs, desired_output = _tests[key] 92 | _tested.add(key) 93 | 94 | if callable(kwargs): 95 | kwargs = kwargs() 96 | 97 | if callable(desired_output): 98 | desired_output = desired_output() 99 | 100 | if desired_output is None: 101 | print("Desired output for %s:" % key, repr(fn(**kwargs))) 102 | exit() 103 | else: 104 | try: 105 | output = fn(**kwargs) 106 | assert_allclose(desired_output, output) 107 | print("Test for %s passed!" % key) 108 | except AssertionError as e: 109 | print(e) 110 | print("Error: test for %s didn't pass!" % key) 111 | exit() 112 | -------------------------------------------------------------------------------- /labs/lab4/tests/__pycache__/a2c_tests.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/tests/__pycache__/a2c_tests.cpython-35.pyc -------------------------------------------------------------------------------- /labs/lab4/tests/__pycache__/pg_tests.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/tests/__pycache__/pg_tests.cpython-35.pyc -------------------------------------------------------------------------------- /labs/lab4/tests/__pycache__/simplepg_tests.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/tests/__pycache__/simplepg_tests.cpython-35.pyc -------------------------------------------------------------------------------- /labs/lab4/tests/__pycache__/trpo_tests.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/tests/__pycache__/trpo_tests.cpython-35.pyc -------------------------------------------------------------------------------- /labs/lab4/tests/a2c_tests.py: -------------------------------------------------------------------------------- 1 | from simplepg.simple_utils import register_test, nprs 2 | import numpy as np 3 | from chainer import Variable 4 | 5 | register_test( 6 | "a2c.compute_returns_advantages", 7 | kwargs=lambda: dict( 8 | rewards=nprs(0).uniform(size=(5, 2)), 9 | dones=nprs(1).choice([True, False], size=(5, 2)), 10 | values=nprs(2).uniform(size=(5, 2)), 11 | next_values=nprs(3).uniform(size=(2,)), 12 | discount=0.99, 13 | ), 14 | desired_output=lambda: ( 15 | np.array([[1.14554925, 1.25462372], 16 | [0.60276338, 0.54488318], 17 | [2.33579066, 1.90456042], 18 | [1.93145037, 1.2713801], 19 | [1.50895268, 0.38344152]]), 20 | np.array([[0.70955434, 1.22869749], 21 | [0.0531009, 0.10956079], 22 | [1.91542286, 1.5742256], 23 | [1.72680173, 0.65210914], 24 | [1.20929801, 0.11661424]]) 25 | ) 26 | ) 27 | 28 | register_test( 29 | "a2c.compute_total_loss", 30 | kwargs=lambda: dict( 31 | logli=Variable(nprs(0).uniform(size=(10,)).astype(np.float32)), 32 | all_advs=Variable(nprs(1).uniform(size=(10,)).astype(np.float32)), 33 | ent_coeff=nprs(2).uniform(), 34 | ent=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)), 35 | vf_loss_coeff=nprs(4).uniform(), 36 | all_returns=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)), 37 | all_values=Variable(nprs(6).uniform(size=(10,)).astype(np.float32)), 38 | ), 39 | desired_output=lambda: ( 40 | Variable(np.array(-0.4047563076019287, dtype=np.float32)), 41 | Variable(np.array(0.22883716225624084, dtype=np.float32)), 42 | Variable(np.array(-0.1834639459848404, dtype=np.float32)) 43 | ) 44 | ) 45 | -------------------------------------------------------------------------------- /labs/lab4/tests/pg_tests.py: -------------------------------------------------------------------------------- 1 | from chainer import Variable 2 | 3 | from simplepg.simple_utils import register_test, nprs 4 | from utils import Gaussian 5 | import numpy as np 6 | 7 | register_test( 8 | "pg.compute_surr_loss", 9 | kwargs=lambda: dict( 10 | dists=Gaussian( 11 | means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)), 12 | log_stds=Variable(nprs(1).uniform( 13 | size=(10, 3)).astype(np.float32)), 14 | ), 15 | all_acts=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)), 16 | all_advs=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)), 17 | ), 18 | desired_output=lambda: Variable( 19 | np.array(1.9201269149780273, dtype=np.float32)) 20 | ) 21 | -------------------------------------------------------------------------------- /labs/lab4/tests/simplepg_tests.py: -------------------------------------------------------------------------------- 1 | from simplepg.simple_utils import register_test, nprs 2 | import numpy as np 3 | 4 | register_test( 5 | "__main__.compute_update", 6 | kwargs=lambda: dict( 7 | discount=0.99, 8 | R_tplus1=1.0, 9 | theta=nprs(0).uniform(size=(2, 2)), 10 | s_t=nprs(1).uniform(size=(1,)), 11 | a_t=nprs(2).choice(2), 12 | r_t=nprs(3).uniform(), 13 | b_t=nprs(4).uniform(), 14 | get_grad_logp_action=lambda theta, *_: theta * 2 15 | ), 16 | desired_output=lambda: ( 17 | 1.5407979025745755, 18 | np.array([[0.62978332, 0.82070564], [0.69169275, 0.62527314]]) 19 | ) 20 | ) 21 | 22 | register_test( 23 | "__main__.compute_baselines", 24 | kwargs=lambda: dict( 25 | all_returns=[ 26 | nprs(0).uniform(size=(10,)), 27 | nprs(1).uniform(size=(20,)), 28 | [], 29 | ], 30 | ), 31 | desired_output=lambda: np.array([0.61576628, 0.36728075, 0.]) 32 | ) 33 | 34 | register_test( 35 | "__main__.compute_fisher_matrix", 36 | kwargs=lambda: dict( 37 | theta=nprs(1).uniform(size=(2, 2)), 38 | get_grad_logp_action=lambda theta, ob, action: np.exp( 39 | theta) * np.linalg.norm(action), 40 | all_observations=list(nprs(2).uniform(size=(5, 1))), 41 | all_actions=list(nprs(3).choice(2, size=(5,))), 42 | ), 43 | desired_output=lambda: np.array([[0.92104469, 1.24739299, 0.60704379, 0.82124306], 44 | [1.24739299, 1.68937435, 45 | 0.82213401, 1.11222925], 46 | [0.60704379, 0.82213401, 47 | 0.40009151, 0.54126635], 48 | [0.82124306, 1.11222925, 0.54126635, 0.73225564]]) 49 | ) 50 | 51 | register_test( 52 | "__main__.compute_natural_gradient", 53 | kwargs=lambda: dict( 54 | F=nprs(0).uniform(size=(4, 4)), 55 | grad=nprs(1).uniform(size=(2, 2)), 56 | reg=1e-3, 57 | ), 58 | desired_output=lambda: np.array( 59 | [[-0.44691565, 0.5477328], [-0.20366472, 0.72267091]]) 60 | ) 61 | 62 | register_test( 63 | "__main__.compute_step_size", 64 | kwargs=lambda: dict( 65 | F=nprs(0).uniform(size=(2, 2)), 66 | natural_grad=nprs(1).uniform(size=(1, 2)), 67 | natural_step_size=1e-2, 68 | ), 69 | desired_output=lambda: 0.1607407366467048, 70 | ) 71 | -------------------------------------------------------------------------------- /labs/lab4/tests/trpo_tests.py: -------------------------------------------------------------------------------- 1 | from simplepg.simple_utils import register_test, nprs 2 | import numpy as np 3 | from chainer import Variable 4 | 5 | from utils import Gaussian 6 | 7 | register_test( 8 | "trpo.compute_surr_loss", 9 | kwargs=lambda: dict( 10 | old_dists=Gaussian( 11 | means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)), 12 | log_stds=Variable(nprs(1).uniform( 13 | size=(10, 3)).astype(np.float32)), 14 | ), 15 | new_dists=Gaussian( 16 | means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)), 17 | log_stds=Variable(nprs(3).uniform( 18 | size=(10, 3)).astype(np.float32)), 19 | ), 20 | all_acts=Variable(nprs(4).uniform(size=(10, 3)).astype(np.float32)), 21 | all_advs=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)), 22 | ), 23 | desired_output=lambda: Variable( 24 | np.array(-0.5629823207855225, dtype=np.float32)) 25 | ) 26 | 27 | register_test( 28 | "trpo.compute_kl", 29 | kwargs=lambda: dict( 30 | old_dists=Gaussian( 31 | means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)), 32 | log_stds=Variable(nprs(1).uniform( 33 | size=(10, 3)).astype(np.float32)), 34 | ), 35 | new_dists=Gaussian( 36 | means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)), 37 | log_stds=Variable(nprs(3).uniform( 38 | size=(10, 3)).astype(np.float32)), 39 | ), 40 | ), 41 | desired_output=lambda: Variable( 42 | np.array(0.5306503176689148, dtype=np.float32)) 43 | ) 44 | -------------------------------------------------------------------------------- /labs/lab4/viskit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/viskit/__init__.py -------------------------------------------------------------------------------- /labs/lab4/viskit/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/viskit/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /labs/lab4/viskit/__pycache__/core.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/viskit/__pycache__/core.cpython-35.pyc -------------------------------------------------------------------------------- /labs/lab4/viskit/core.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import itertools 3 | import json 4 | import os 5 | 6 | import numpy as np 7 | 8 | 9 | # from sandbox.rocky.utils.py_utils import AttrDict 10 | 11 | class AttrDict(dict): 12 | def __init__(self, *args, **kwargs): 13 | super(AttrDict, self).__init__(*args, **kwargs) 14 | self.__dict__ = self 15 | 16 | 17 | def unique(l): 18 | return list(set(l)) 19 | 20 | 21 | def flatten(l): 22 | return [item for sublist in l for item in sublist] 23 | 24 | 25 | def load_progress(progress_json_path, verbose=True): 26 | if verbose: 27 | print("Reading %s" % progress_json_path) 28 | entries = dict() 29 | rows = [] 30 | with open(progress_json_path, 'r') as f: 31 | lines = f.read().split('\n') 32 | for line in lines: 33 | if len(line) > 0: 34 | row = json.loads(line) 35 | rows.append(row) 36 | all_keys = set(k for row in rows for k in row.keys()) 37 | for k in all_keys: 38 | if k not in entries: 39 | entries[k] = [] 40 | for row in rows: 41 | if k in row: 42 | v = row[k] 43 | try: 44 | entries[k].append(float(v)) 45 | except: 46 | entries[k].append(np.nan) 47 | else: 48 | entries[k].append(np.nan) 49 | 50 | # entries[key] = [row.get(key, np.nan) for row in rows] 51 | # added_keys = set() 52 | # for k, v in row.items(): 53 | # if k not in entries: 54 | # entries[k] = [] 55 | # try: 56 | # entries[k].append(float(v)) 57 | # except: 58 | # entries[k].append(0.) 59 | # added_keys.add(k) 60 | # for k in entries.keys(): 61 | # if k not in added_keys: 62 | # entries[k].append(np.nan) 63 | entries = dict([(k, np.array(v)) for k, v in entries.items()]) 64 | return entries 65 | 66 | 67 | def flatten_dict(d): 68 | flat_params = dict() 69 | for k, v in d.items(): 70 | if isinstance(v, dict): 71 | v = flatten_dict(v) 72 | for subk, subv in flatten_dict(v).items(): 73 | flat_params[k + "." + subk] = subv 74 | else: 75 | flat_params[k] = v 76 | return flat_params 77 | 78 | 79 | def load_params(params_json_path): 80 | with open(params_json_path, 'r') as f: 81 | data = json.loads(f.read()) 82 | if "args_data" in data: 83 | del data["args_data"] 84 | if "exp_name" not in data: 85 | data["exp_name"] = params_json_path.split("/")[-2] 86 | return data 87 | 88 | 89 | def lookup(d, keys): 90 | if not isinstance(keys, list): 91 | keys = keys.split(".") 92 | for k in keys: 93 | if hasattr(d, "__getitem__"): 94 | if k in d: 95 | d = d[k] 96 | else: 97 | return None 98 | else: 99 | return None 100 | return d 101 | 102 | 103 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True): 104 | if isinstance(exp_folder_paths, str): 105 | exp_folder_paths = [exp_folder_paths] 106 | exps = [] 107 | for exp_folder_path in exp_folder_paths: 108 | exps += [x[0] for x in os.walk(exp_folder_path)] 109 | if verbose: 110 | print("finished walking exp folders") 111 | exps_data = [] 112 | for exp in exps: 113 | try: 114 | exp_path = exp 115 | variant_json_path = os.path.join(exp_path, "variant.json") 116 | progress_json_path = os.path.join(exp_path, "progress.json") 117 | progress = load_progress(progress_json_path, verbose=verbose) 118 | try: 119 | params = load_params(variant_json_path) 120 | except IOError: 121 | params = dict(exp_name="experiment") 122 | exps_data.append(AttrDict( 123 | progress=progress, params=params, flat_params=flatten_dict(params))) 124 | except IOError as e: 125 | if verbose: 126 | print(e) 127 | 128 | # a dictionary of all keys and types of values 129 | all_keys = dict() 130 | for data in exps_data: 131 | for key in data.flat_params.keys(): 132 | if key not in all_keys: 133 | all_keys[key] = type(data.flat_params[key]) 134 | 135 | # if any data does not have some key, specify the value of it 136 | if not ignore_missing_keys: 137 | default_values = dict() 138 | for data in exps_data: 139 | for key in sorted(all_keys.keys()): 140 | if key not in data.flat_params: 141 | if key not in default_values: 142 | default = None 143 | default_values[key] = default 144 | data.flat_params[key] = default_values[key] 145 | 146 | return exps_data 147 | 148 | 149 | def smart_repr(x): 150 | if isinstance(x, tuple): 151 | if len(x) == 0: 152 | return "tuple()" 153 | elif len(x) == 1: 154 | return "(%s,)" % smart_repr(x[0]) 155 | else: 156 | return "(" + ",".join(map(smart_repr, x)) + ")" 157 | else: 158 | if hasattr(x, "__call__"): 159 | return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__) 160 | else: 161 | return repr(x) 162 | 163 | 164 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1): 165 | try: 166 | stringified_pairs = sorted( 167 | map( 168 | eval, 169 | unique( 170 | flatten( 171 | [ 172 | list( 173 | map( 174 | smart_repr, 175 | list(d.flat_params.items()) 176 | ) 177 | ) 178 | for d in exps_data 179 | ] 180 | ) 181 | ) 182 | ), 183 | key=lambda x: ( 184 | tuple("" if it is None else str(it) for it in x), 185 | ) 186 | ) 187 | except Exception as e: 188 | print(e) 189 | import ipdb 190 | ipdb.set_trace() 191 | proposals = [(k, [x[1] for x in v]) 192 | for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])] 193 | filtered = [(k, v) for (k, v) in proposals if len(v) > l and all( 194 | [k.find(excluded_param) != 0 for excluded_param in excluded_params])] 195 | return filtered 196 | 197 | 198 | class Selector(object): 199 | def __init__(self, exps_data, filters=None, custom_filters=None): 200 | self._exps_data = exps_data 201 | if filters is None: 202 | self._filters = tuple() 203 | else: 204 | self._filters = tuple(filters) 205 | if custom_filters is None: 206 | self._custom_filters = [] 207 | else: 208 | self._custom_filters = custom_filters 209 | 210 | def where(self, k, v): 211 | return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters) 212 | 213 | def custom_filter(self, filter): 214 | return Selector(self._exps_data, self._filters, self._custom_filters + [filter]) 215 | 216 | def _check_exp(self, exp): 217 | # or exp.flat_params.get(k, None) is None 218 | return all( 219 | ((str(exp.flat_params.get(k, None)) == str(v) or ( 220 | k not in exp.flat_params)) for k, v in self._filters) 221 | ) and all(custom_filter(exp) for custom_filter in self._custom_filters) 222 | 223 | def extract(self): 224 | return list(filter(self._check_exp, self._exps_data)) 225 | 226 | def iextract(self): 227 | return filter(self._check_exp, self._exps_data) 228 | 229 | 230 | # Taken from plot.ly 231 | color_defaults = [ 232 | '#1f77b4', # muted blue 233 | '#ff7f0e', # safety orange 234 | '#2ca02c', # cooked asparagus green 235 | '#d62728', # brick red 236 | '#9467bd', # muted purple 237 | '#8c564b', # chestnut brown 238 | '#e377c2', # raspberry yogurt pink 239 | '#7f7f7f', # middle gray 240 | '#bcbd22', # curry yellow-green 241 | '#17becf' # blue-teal 242 | ] 243 | 244 | 245 | def hex_to_rgb(hex, opacity=1.0): 246 | if hex[0] == '#': 247 | hex = hex[1:] 248 | assert (len(hex) == 6) 249 | return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity) 250 | -------------------------------------------------------------------------------- /labs/lab5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab5.pdf -------------------------------------------------------------------------------- /labs/lab5/alg_utils.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | 3 | 4 | # ============================== 5 | # Shared utilities 6 | # ============================== 7 | 8 | def compute_cumulative_returns(rewards, baselines, discount): 9 | # This method builds up the cumulative sum of discounted rewards for each time step: 10 | # R[t] = sum_{t'>=t} γ^(t'-t)*r_t' 11 | # Note that we use γ^(t'-t) instead of γ^t'. This gives us a biased gradient but lower variance 12 | returns = [] 13 | # Use the last baseline prediction to back up 14 | cum_return = baselines[-1] 15 | for reward in rewards[::-1]: 16 | cum_return = cum_return * discount + reward 17 | returns.append(cum_return) 18 | return returns[::-1] 19 | 20 | 21 | def compute_advantages(rewards, baselines, discount, gae_lambda): 22 | # Given returns R_t and baselines b(s_t), compute (generalized) advantage estimate A_t 23 | deltas = rewards + discount * baselines[1:] - baselines[:-1] 24 | advs = [] 25 | cum_adv = 0 26 | multiplier = discount * gae_lambda 27 | for delta in deltas[::-1]: 28 | cum_adv = cum_adv * multiplier + delta 29 | advs.append(cum_adv) 30 | return advs[::-1] 31 | 32 | 33 | def compute_pg_vars(trajs, policy, baseline, discount, gae_lambda): 34 | """ 35 | Compute chainer variables needed for various policy gradient algorithms 36 | """ 37 | for traj in trajs: 38 | # Include the last observation here, in case the trajectory is not finished 39 | baselines = baseline.predict(np.concatenate( 40 | [traj["observations"], [traj["last_observation"]]])) 41 | if traj['finished']: 42 | # If already finished, the future cumulative rewards starting from the final state is 0 43 | baselines[-1] = 0. 44 | # This is useful when fitting baselines. It uses the baseline prediction of the last state value to perform 45 | # Bellman backup if the trajectory is not finished. 46 | traj['returns'] = compute_cumulative_returns( 47 | traj['rewards'], baselines, discount) 48 | traj['advantages'] = compute_advantages( 49 | traj['rewards'], baselines, discount, gae_lambda) 50 | traj['baselines'] = baselines[:-1] 51 | 52 | # First, we compute a flattened list of observations, actions, and advantages 53 | all_obs = np.concatenate([traj['observations'] for traj in trajs], axis=0) 54 | all_acts = np.concatenate([traj['actions'] for traj in trajs], axis=0) 55 | all_advs = np.concatenate([traj['advantages'] for traj in trajs], axis=0) 56 | all_dists = { 57 | k: np.concatenate([traj['distributions'][k] for traj in trajs], axis=0) 58 | for k in trajs[0]['distributions'].keys() 59 | } 60 | 61 | # Normalizing the advantage values can make the algorithm more robust to reward scaling 62 | all_advs = (all_advs - np.mean(all_advs)) / (np.std(all_advs) + 1e-8) 63 | 64 | # Form chainer variables 65 | all_obs = Variable(all_obs) 66 | all_acts = Variable(all_acts) 67 | all_advs = Variable(all_advs.astype(np.float32, copy=False)) 68 | all_dists = policy.distribution.from_dict( 69 | {k: Variable(v) for k, v in all_dists.items()}) 70 | 71 | return all_obs, all_acts, all_advs, all_dists 72 | 73 | 74 | # ============================== 75 | # Helper methods for logging 76 | # ============================== 77 | 78 | def log_reward_statistics(env): 79 | # keep unwrapping until we get the monitor 80 | while not isinstance(env, gym.wrappers.Monitor): # and not isinstance() 81 | if not isinstance(env, gym.Wrapper): 82 | assert False 83 | env = env.env 84 | # env.unwrapped 85 | assert isinstance(env, gym.wrappers.Monitor) 86 | all_stats = None 87 | for _ in range(10): 88 | try: 89 | all_stats = gym.wrappers.monitoring.load_results(env.directory) 90 | except FileNotFoundError: 91 | time.sleep(1) 92 | continue 93 | if all_stats is not None: 94 | episode_rewards = all_stats['episode_rewards'] 95 | episode_lengths = all_stats['episode_lengths'] 96 | 97 | recent_episode_rewards = episode_rewards[-100:] 98 | recent_episode_lengths = episode_lengths[-100:] 99 | 100 | if len(recent_episode_rewards) > 0: 101 | logger.logkv('AverageReturn', np.mean(recent_episode_rewards)) 102 | logger.logkv('MinReturn', np.min(recent_episode_rewards)) 103 | logger.logkv('MaxReturn', np.max(recent_episode_rewards)) 104 | logger.logkv('StdReturn', np.std(recent_episode_rewards)) 105 | logger.logkv('AverageEpisodeLength', 106 | np.mean(recent_episode_lengths)) 107 | logger.logkv('MinEpisodeLength', np.min(recent_episode_lengths)) 108 | logger.logkv('MaxEpisodeLength', np.max(recent_episode_lengths)) 109 | logger.logkv('StdEpisodeLength', np.std(recent_episode_lengths)) 110 | 111 | logger.logkv('TotalNEpisodes', len(episode_rewards)) 112 | logger.logkv('TotalNSamples', np.sum(episode_lengths)) 113 | 114 | 115 | def log_baseline_statistics(trajs): 116 | # Specifically, compute the explained variance, defined as 117 | baselines = np.concatenate([traj['baselines'] for traj in trajs]) 118 | returns = np.concatenate([traj['returns'] for traj in trajs]) 119 | logger.logkv('ExplainedVariance', 120 | explained_variance_1d(baselines, returns)) 121 | 122 | 123 | def log_action_distribution_statistics(dists): 124 | with chainer.no_backprop_mode(): 125 | entropy = F.mean(dists.entropy()).data 126 | logger.logkv('Entropy', entropy) 127 | logger.logkv('Perplexity', np.exp(entropy)) 128 | if isinstance(dists, Gaussian): 129 | logger.logkv('AveragePolicyStd', F.mean( 130 | F.exp(dists.log_stds)).data) 131 | for idx in range(dists.log_stds.shape[-1]): 132 | logger.logkv('AveragePolicyStd[{}]'.format( 133 | idx), F.mean(F.exp(dists.log_stds[..., idx])).data) 134 | elif isinstance(dists, Categorical): 135 | probs = F.mean(F.softmax(dists.logits), axis=0).data 136 | for idx in range(len(probs)): 137 | logger.logkv('AveragePolicyProb[{}]'.format(idx), probs[idx]) 138 | -------------------------------------------------------------------------------- /labs/lab5/algs.py: -------------------------------------------------------------------------------- 1 | from pg import pg 2 | from trpo import trpo 3 | from a2c import a2c 4 | -------------------------------------------------------------------------------- /labs/lab5/cloudexec.yml.template: -------------------------------------------------------------------------------- 1 | ## Attendee-specific 2 | 3 | attendee_id: &attendee_id YOUR_ID_HERE 4 | ec2_instance_label: *attendee_id 5 | s3_bucket_root: *attendee_id 6 | 7 | aws_access_key: YOUR_ACCESS_KEY_ID_HERE 8 | aws_access_secret: YOUR_SECRET_ACCESS_KEY_HERE 9 | 10 | # TODO run `python scripts/generate_key_pairs.py`, and fill this in! 11 | 12 | aws_key_pairs: 13 | us-east-1: YOUR_KEY_PAIR_NAME 14 | 15 | ## Bootcamp-specific 16 | docker_image: dementrock/deeprlbootcamp 17 | 18 | # Since we are using a public docker image, no need to log in 19 | docker_username: ~ 20 | docker_password: ~ 21 | docker_host: ~ 22 | 23 | s3_bucket: deeprlbootcamp 24 | aws_s3_region: us-east-1 25 | 26 | # Instance configuration 27 | aws_instance_type: c4.large 28 | aws_use_spot_instances: true 29 | aws_spot_price: 0.1 30 | 31 | aws_iam_instance_profile: attendee_instance_profile 32 | 33 | aws_image_id: 34 | ap-northeast-1: ami-a3c737c5 35 | ap-northeast-2: ami-8faa72e1 36 | ap-south-1: ami-65094c0a 37 | ap-southeast-1: ami-c6bfdba5 38 | ap-southeast-2: ami-597b603a 39 | ca-central-1: ami-60982604 40 | eu-central-1: ami-06933b69 41 | eu-west-1: ami-ba08f5c3 42 | eu-west-2: ami-13524277 43 | sa-east-1: ami-8be293e7 44 | us-east-1: ami-b0c2fecb 45 | us-east-2: ami-6f43600a 46 | us-west-1: ami-1ecce67e 47 | us-west-2: ami-999e72e1 48 | 49 | aws_regions: 50 | - us-east-1 51 | 52 | aws_security_groups: 53 | us-east-1: sg-55afed24 54 | 55 | aws_subnets: 56 | us-east-1: 57 | us-east-1a: subnet-55155d1d 58 | us-east-1b: subnet-5953da03 59 | us-east-1c: subnet-82fe18e6 60 | us-east-1d: subnet-7db42551 61 | us-east-1e: subnet-4b686877 62 | us-east-1f: subnet-2076172c 63 | 64 | ## Other default settings 65 | 66 | ec2_project_root: /home/ubuntu/code 67 | ec2_user: ubuntu 68 | 69 | ec2_terminate_machine: true 70 | 71 | s3_code_sync_ignores: 72 | - data/local 73 | - data/s3 74 | - build_lab3 75 | - build_lab4 76 | - build_lab5 77 | - bullet3 78 | - roboschool 79 | - private 80 | - pong_warm_start.pkl 81 | - dqn/replay_buffer.pkl 82 | - dqn/weights.pkl 83 | - Lab-Policy-Gradient-Algorithms 84 | - Lab-RL-in-the-Cloud 85 | - .git 86 | - .gitignore 87 | - .pods 88 | - .DS_Store 89 | - .idea 90 | - cloudexec.yml 91 | - __pycache__ 92 | 93 | s3_periodic_sync_interval: 15 94 | 95 | s3_periodic_sync_include_flags: "--include *progress.json --include *variant.json" 96 | 97 | debug: false 98 | -------------------------------------------------------------------------------- /labs/lab5/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | viskit_port=$("$DIR/findport.py" 5000 1) 4 | xhost=xhost 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | if [[ $(uname) == 'Darwin' ]]; then 12 | # if xhost not defined, check 13 | if ! hash $xhost 2>/dev/null; then 14 | xhost=/opt/X11/bin/xhost 15 | if [ ! -f $xhost ]; then 16 | echo "xhost not found!" 17 | exit 18 | fi 19 | fi 20 | ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}') 21 | $xhost + $ip >/dev/null 22 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 23 | -e DISPLAY=$ip:0 \ 24 | -v "$DIR":/root/code/bootcamp_pg \ 25 | -ti dementrock/deeprlbootcamp \ 26 | ${1-/bin/bash} "${@:2}" 27 | $xhost - $ip >/dev/null 28 | elif [[ $(uname) == 'Linux' ]]; then 29 | $xhost +local:root >/dev/null 30 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 31 | -e DISPLAY=$DISPLAY \ 32 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 33 | -v "$DIR":/root/code/bootcamp_pg \ 34 | -ti dementrock/deeprlbootcamp \ 35 | ${1-/bin/bash} "${@:2}" 36 | $xhost -local:root >/dev/null 37 | else 38 | echo "This script only supports macOS or Linux" 39 | fi 40 | -------------------------------------------------------------------------------- /labs/lab5/docker_run_vnc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | vnc_port=$("$DIR/findport.py" 3000 1) 4 | viskit_port=$("$DIR/findport.py" 5000 1) 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284" 12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 13 | -v "$DIR":/root/code/bootcamp_pg \ 14 | -ti dementrock/deeprlbootcamp \ 15 | ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}" 16 | -------------------------------------------------------------------------------- /labs/lab5/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | channels: 3 | - menpo 4 | - soumith 5 | dependencies: 6 | - python==3.5.3 7 | - opencv3=3.1.0 8 | - numpy==1.13.1 9 | - scipy==0.19.1 10 | - pip: 11 | - gym==0.9.2 12 | - chainer==2.0.1 13 | - ipdb==0.10.3 14 | - tblib==1.3.2 15 | - atari_py==0.1.1 16 | - Pillow==4.2.1 17 | - PyOpenGL==3.1.0 18 | - cloudpickle==0.3.1 19 | - click==6.7 20 | - python-dateutil==2.6.1 21 | - pyyaml==3.12 22 | - easydict==1.7 23 | - boto3==1.4.4 24 | - mako==1.0.7 25 | - redis==2.10.5 26 | - Flask==0.12.2 27 | - plotly==2.0.12 28 | - tqdm==4.14.0 29 | - cupy==1.0.1; 'linux' in sys_platform 30 | - cached-property==1.3.0 31 | - h5py==2.7.0 32 | -------------------------------------------------------------------------------- /labs/lab5/experiments/run_a2c_breakout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from algs import a2c 3 | from env_makers import EnvMaker 4 | from models import CategoricalCNNPolicy 5 | from utils import SnapshotSaver 6 | import numpy as np 7 | import os 8 | import logger 9 | 10 | log_dir = "data/local/a2c-breakout" 11 | 12 | np.random.seed(42) 13 | 14 | # Clean up existing logs 15 | os.system("rm -rf {}".format(log_dir)) 16 | 17 | with logger.session(log_dir): 18 | env_maker = EnvMaker('BreakoutNoFrameskip-v4') 19 | env = env_maker.make() 20 | policy = CategoricalCNNPolicy( 21 | env.observation_space, env.action_space, env.spec) 22 | vf = policy.create_vf() 23 | a2c( 24 | env=env, 25 | env_maker=env_maker, 26 | n_envs=16, 27 | policy=policy, 28 | vf=vf, 29 | snapshot_saver=SnapshotSaver(log_dir, interval=10), 30 | ) 31 | -------------------------------------------------------------------------------- /labs/lab5/experiments/run_a2c_pong.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from algs import a2c 3 | from env_makers import EnvMaker 4 | from models import CategoricalCNNPolicy 5 | from utils import SnapshotSaver 6 | import numpy as np 7 | import os 8 | import logger 9 | 10 | log_dir = "data/local/a2c-pong" 11 | 12 | np.random.seed(42) 13 | 14 | # Clean up existing logs 15 | os.system("rm -rf {}".format(log_dir)) 16 | 17 | with logger.session(log_dir): 18 | env_maker = EnvMaker('PongNoFrameskip-v4') 19 | env = env_maker.make() 20 | policy = CategoricalCNNPolicy( 21 | env.observation_space, env.action_space, env.spec) 22 | vf = policy.create_vf() 23 | a2c( 24 | env=env, 25 | env_maker=env_maker, 26 | n_envs=16, 27 | policy=policy, 28 | vf=vf, 29 | snapshot_saver=SnapshotSaver(log_dir, interval=10), 30 | ) 31 | -------------------------------------------------------------------------------- /labs/lab5/experiments/run_cloud_trpo_cartpole.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import cloudexec 4 | import numpy as np 5 | from env_makers import EnvMaker 6 | from models import MLPBaseline, CategoricalMLPPolicy 7 | from algs import trpo 8 | from utils import SnapshotSaver 9 | import logger 10 | 11 | 12 | def run(v): 13 | np.random.seed(v['seed']) 14 | env_maker = EnvMaker('CartPole-v0') 15 | env = env_maker.make() 16 | policy = CategoricalMLPPolicy( 17 | observation_space=env.observation_space, 18 | action_space=env.action_space, 19 | env_spec=env.spec 20 | ) 21 | baseline = MLPBaseline( 22 | observation_space=env.observation_space, 23 | action_space=env.action_space, 24 | env_spec=env.spec 25 | ) 26 | trpo( 27 | env=env, 28 | env_maker=env_maker, 29 | n_envs=16, 30 | policy=policy, 31 | baseline=baseline, 32 | batch_size=2000, 33 | n_iters=100, 34 | snapshot_saver=SnapshotSaver(logger.get_dir()) 35 | ) 36 | 37 | 38 | cloudexec.remote_call( 39 | task=cloudexec.Task( 40 | run, 41 | variant=dict(seed=0), 42 | ), 43 | config=cloudexec.Config( 44 | exp_group="trpo-cartpole", 45 | ), 46 | mode=cloudexec.local_mode, 47 | ) 48 | -------------------------------------------------------------------------------- /labs/lab5/experiments/run_cloud_trpo_pendulum_baseline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import cloudexec 4 | from cloudexec import VariantGenerator 5 | import numpy as np 6 | from env_makers import EnvMaker 7 | from models import MLPBaseline, TimeDependentBaseline, LinearFeatureBaseline 8 | from models import GaussianMLPPolicy 9 | from algs import trpo 10 | from utils import SnapshotSaver 11 | import chainer 12 | import logger 13 | 14 | 15 | def run(v): 16 | np.random.seed(v['seed']) 17 | env_maker = EnvMaker('Pendulum-v0') 18 | env = env_maker.make() 19 | policy = GaussianMLPPolicy( 20 | observation_space=env.observation_space, 21 | action_space=env.action_space, 22 | env_spec=env.spec, 23 | hidden_sizes=(64, 64), 24 | hidden_nonlinearity=chainer.functions.tanh, 25 | ) 26 | if v['baseline'] == 'mlp': 27 | baseline = MLPBaseline( 28 | observation_space=env.observation_space, 29 | action_space=env.action_space, 30 | env_spec=env.spec, 31 | hidden_sizes=(64, 64), 32 | hidden_nonlinearity=chainer.functions.tanh, 33 | ) 34 | elif v['baseline'] == 'time_dependent': 35 | baseline = TimeDependentBaseline( 36 | observation_space=env.observation_space, 37 | action_space=env.action_space, 38 | env_spec=env.spec, 39 | ) 40 | elif v['baseline'] == 'linear_feature': 41 | baseline = LinearFeatureBaseline( 42 | observation_space=env.observation_space, 43 | action_space=env.action_space, 44 | env_spec=env.spec, 45 | ) 46 | else: 47 | raise ValueError 48 | trpo( 49 | env=env, 50 | env_maker=env_maker, 51 | n_envs=16, 52 | policy=policy, 53 | baseline=baseline, 54 | batch_size=10000, 55 | n_iters=100, 56 | snapshot_saver=SnapshotSaver(logger.get_dir()), 57 | ) 58 | 59 | 60 | vg = VariantGenerator() 61 | vg.add("seed", [0, 100, 200]) 62 | vg.add("baseline", ['mlp', 'linear_feature', 'time_dependent']) 63 | 64 | for variant in vg.variants(): 65 | cloudexec.remote_call( 66 | task=cloudexec.Task( 67 | run, 68 | variant=variant, 69 | ), 70 | config=cloudexec.Config( 71 | exp_group="trpo-pendulum-baseline", 72 | ), 73 | mode=cloudexec.local_mode, 74 | ) 75 | -------------------------------------------------------------------------------- /labs/lab5/experiments/run_pg_cartpole.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import chainer 3 | 4 | from algs import pg 5 | from env_makers import EnvMaker 6 | from models import CategoricalMLPPolicy, MLPBaseline 7 | from utils import SnapshotSaver 8 | import numpy as np 9 | import os 10 | import logger 11 | 12 | log_dir = "data/local/pg-cartpole" 13 | 14 | np.random.seed(42) 15 | 16 | # Clean up existing logs 17 | os.system("rm -rf {}".format(log_dir)) 18 | 19 | with logger.session(log_dir): 20 | env_maker = EnvMaker('CartPole-v0') 21 | env = env_maker.make() 22 | policy = CategoricalMLPPolicy(observation_space=env.observation_space, action_space=env.action_space, 23 | env_spec=env.spec) 24 | baseline = MLPBaseline(observation_space=env.observation_space, action_space=env.action_space, 25 | env_spec=env.spec) 26 | pg( 27 | env=env, 28 | env_maker=env_maker, 29 | n_envs=16, 30 | policy=policy, 31 | baseline=baseline, 32 | batch_size=2000, 33 | n_iters=100, 34 | snapshot_saver=SnapshotSaver(log_dir), 35 | optimizer=chainer.optimizers.Adam(1e-2) 36 | ) 37 | -------------------------------------------------------------------------------- /labs/lab5/experiments/run_trpo_cartpole.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from algs import trpo 3 | from env_makers import EnvMaker 4 | from models import CategoricalMLPPolicy, MLPBaseline 5 | from utils import SnapshotSaver 6 | import numpy as np 7 | import os 8 | import logger 9 | 10 | log_dir = "data/local/trpo-cartpole" 11 | 12 | np.random.seed(42) 13 | 14 | # Clean up existing logs 15 | os.system("rm -rf {}".format(log_dir)) 16 | 17 | with logger.session(log_dir): 18 | env_maker = EnvMaker('CartPole-v0') 19 | env = env_maker.make() 20 | policy = CategoricalMLPPolicy( 21 | observation_space=env.observation_space, 22 | action_space=env.action_space, 23 | env_spec=env.spec 24 | ) 25 | baseline = MLPBaseline( 26 | observation_space=env.observation_space, 27 | action_space=env.action_space, 28 | env_spec=env.spec 29 | ) 30 | trpo( 31 | env=env, 32 | env_maker=env_maker, 33 | n_envs=16, 34 | policy=policy, 35 | baseline=baseline, 36 | batch_size=2000, 37 | n_iters=100, 38 | snapshot_saver=SnapshotSaver(log_dir) 39 | ) 40 | -------------------------------------------------------------------------------- /labs/lab5/experiments/run_trpo_half_cheetah.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import chainer 3 | 4 | from algs import trpo 5 | from env_makers import EnvMaker 6 | from models import GaussianMLPPolicy, MLPBaseline 7 | from utils import SnapshotSaver 8 | import numpy as np 9 | import os 10 | import logger 11 | 12 | log_dir = "data/local/trpo-half-cheetah" 13 | 14 | np.random.seed(42) 15 | 16 | # Clean up existing logs 17 | os.system("rm -rf {}".format(log_dir)) 18 | 19 | with logger.session(log_dir): 20 | env_maker = EnvMaker('RoboschoolHalfCheetah-v1') 21 | env = env_maker.make() 22 | policy = GaussianMLPPolicy( 23 | observation_space=env.observation_space, 24 | action_space=env.action_space, 25 | env_spec=env.spec, 26 | hidden_sizes=(256, 64), 27 | hidden_nonlinearity=chainer.functions.tanh, 28 | ) 29 | baseline = MLPBaseline( 30 | observation_space=env.observation_space, 31 | action_space=env.action_space, 32 | env_spec=env.spec, 33 | hidden_sizes=(256, 64), 34 | hidden_nonlinearity=chainer.functions.tanh, 35 | ) 36 | trpo( 37 | env=env, 38 | env_maker=env_maker, 39 | n_envs=16, 40 | policy=policy, 41 | baseline=baseline, 42 | batch_size=5000, 43 | n_iters=5000, 44 | snapshot_saver=SnapshotSaver(log_dir, interval=10), 45 | ) 46 | -------------------------------------------------------------------------------- /labs/lab5/experiments/run_trpo_pendulum.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import chainer 3 | 4 | from algs import trpo 5 | from env_makers import EnvMaker 6 | from models import GaussianMLPPolicy, MLPBaseline 7 | from utils import SnapshotSaver 8 | import numpy as np 9 | import os 10 | import logger 11 | 12 | log_dir = "data/local/trpo-pendulum" 13 | 14 | np.random.seed(42) 15 | 16 | # Clean up existing logs 17 | os.system("rm -rf {}".format(log_dir)) 18 | 19 | with logger.session(log_dir): 20 | env_maker = EnvMaker('Pendulum-v0') 21 | env = env_maker.make() 22 | policy = GaussianMLPPolicy( 23 | observation_space=env.observation_space, 24 | action_space=env.action_space, 25 | env_spec=env.spec, 26 | hidden_sizes=(64, 64), 27 | hidden_nonlinearity=chainer.functions.tanh, 28 | ) 29 | baseline = MLPBaseline( 30 | observation_space=env.observation_space, 31 | action_space=env.action_space, 32 | env_spec=env.spec, 33 | hidden_sizes=(64, 64), 34 | hidden_nonlinearity=chainer.functions.tanh, 35 | ) 36 | trpo( 37 | env=env, 38 | env_maker=env_maker, 39 | n_envs=16, 40 | policy=policy, 41 | baseline=baseline, 42 | batch_size=10000, 43 | n_iters=100, 44 | snapshot_saver=SnapshotSaver(log_dir), 45 | ) 46 | -------------------------------------------------------------------------------- /labs/lab5/findport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Usage: findport.py 3000 100 4 | # 5 | from __future__ import print_function 6 | import socket 7 | from contextlib import closing 8 | import sys 9 | 10 | if len(sys.argv) != 3: 11 | print("Usage: {} ".format(sys.argv[0])) 12 | sys.exit(1) 13 | 14 | base = int(sys.argv[1]) 15 | increment = int(sys.argv[2]) 16 | 17 | 18 | def find_free_port(): 19 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 20 | for port in range(base, 65536, increment): 21 | try: 22 | s.bind(('', port)) 23 | return s.getsockname()[1] 24 | except socket.error: 25 | continue 26 | 27 | 28 | print(find_free_port()) 29 | -------------------------------------------------------------------------------- /labs/lab5/launch_bg_screen_buffer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | killall() { 4 | kill -INT "$xvfb_pid" 5 | kill -INT "$x11vnc_pid" 6 | exit 7 | } 8 | 9 | trap killall SIGINT 10 | trap killall SIGTERM 11 | trap killall SIGKILL 12 | 13 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$! 14 | 15 | mkdir ~/.x11vnc 16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd 17 | 18 | command="${1-/bin/bash} ${@:2}" 19 | 20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!" 21 | 22 | DISPLAY=:99 $command 23 | 24 | killall 25 | -------------------------------------------------------------------------------- /labs/lab5/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | See README.md for a description of the logging API. 4 | 5 | OFF state corresponds to having Logger.CURRENT == Logger.DEFAULT 6 | ON state is otherwise 7 | 8 | """ 9 | import datetime 10 | from collections import OrderedDict 11 | import os 12 | import sys 13 | import shutil 14 | import os.path as osp 15 | import json 16 | 17 | import dateutil.tz 18 | 19 | LOG_OUTPUT_FORMATS = ['stdout', 'log', 'json'] 20 | 21 | DEBUG = 10 22 | INFO = 20 23 | WARN = 30 24 | ERROR = 40 25 | 26 | DISABLED = 50 27 | 28 | 29 | class OutputFormat(object): 30 | def writekvs(self, kvs): 31 | """ 32 | Write key-value pairs 33 | """ 34 | raise NotImplementedError 35 | 36 | def writeseq(self, args): 37 | """ 38 | Write a sequence of other data (e.g. a logging message) 39 | """ 40 | pass 41 | 42 | def close(self): 43 | return 44 | 45 | 46 | class HumanOutputFormat(OutputFormat): 47 | def __init__(self, file): 48 | self.file = file 49 | 50 | def writekvs(self, kvs): 51 | # Create strings for printing 52 | key2str = OrderedDict() 53 | for (key, val) in kvs.items(): 54 | valstr = '%-8.5g' % (val,) if hasattr(val, '__float__') else val 55 | key2str[self._truncate(key)] = self._truncate(valstr) 56 | 57 | # Find max widths 58 | keywidth = max(map(len, key2str.keys())) 59 | valwidth = max(map(len, key2str.values())) 60 | 61 | # Write out the data 62 | dashes = '-' * (keywidth + valwidth + 7) 63 | lines = [dashes] 64 | for (key, val) in key2str.items(): 65 | lines.append('| %s%s | %s%s |' % ( 66 | key, 67 | ' ' * (keywidth - len(key)), 68 | val, 69 | ' ' * (valwidth - len(val)), 70 | )) 71 | lines.append(dashes) 72 | self.file.write('\n'.join(lines) + '\n') 73 | 74 | # Flush the output to the file 75 | self.file.flush() 76 | 77 | def _truncate(self, s): 78 | return s[:20] + '...' if len(s) > 23 else s 79 | 80 | def writeseq(self, args): 81 | for arg in args: 82 | self.file.write(arg) 83 | self.file.write('\n') 84 | self.file.flush() 85 | 86 | 87 | class JSONOutputFormat(OutputFormat): 88 | def __init__(self, file): 89 | self.file = file 90 | 91 | def writekvs(self, kvs): 92 | for k, v in kvs.items(): 93 | if hasattr(v, 'dtype'): 94 | v = v.tolist() 95 | kvs[k] = float(v) 96 | self.file.write(json.dumps(kvs) + '\n') 97 | self.file.flush() 98 | 99 | 100 | def make_output_format(format, ev_dir): 101 | os.makedirs(ev_dir, exist_ok=True) 102 | if format == 'stdout': 103 | return HumanOutputFormat(sys.stdout) 104 | elif format == 'log': 105 | log_file = open(osp.join(ev_dir, 'log.txt'), 'at') 106 | return HumanOutputFormat(log_file) 107 | elif format == 'json': 108 | json_file = open(osp.join(ev_dir, 'progress.json'), 'at') 109 | return JSONOutputFormat(json_file) 110 | else: 111 | raise ValueError('Unknown format specified: %s' % (format,)) 112 | 113 | 114 | # ================================================================ 115 | # API 116 | # ================================================================ 117 | 118 | 119 | def logkv(key, val): 120 | """ 121 | Log a value of some diagnostic 122 | Call this once for each diagnostic quantity, each iteration 123 | """ 124 | Logger.CURRENT.logkv(key, val) 125 | 126 | 127 | def dumpkvs(): 128 | """ 129 | Write all of the diagnostics from the current iteration 130 | 131 | level: int. (see old_logger.py docs) If the global logger level is higher than 132 | the level argument here, don't print to stdout. 133 | """ 134 | Logger.CURRENT.dumpkvs() 135 | 136 | 137 | # for backwards compatibility 138 | record_tabular = logkv 139 | dump_tabular = dumpkvs 140 | 141 | 142 | def log(*args, level=INFO): 143 | """ 144 | Write the sequence of args, with no separators, to the console and output files (if you've configured an output file). 145 | """ 146 | Logger.CURRENT.log(*args, level=level) 147 | 148 | 149 | def debug(*args): 150 | log(*args, level=DEBUG) 151 | 152 | 153 | def info(*args): 154 | log(*args, level=INFO) 155 | 156 | 157 | def warn(*args): 158 | log(*args, level=WARN) 159 | 160 | 161 | def error(*args): 162 | log(*args, level=ERROR) 163 | 164 | 165 | def set_level(level): 166 | """ 167 | Set logging threshold on current logger. 168 | """ 169 | Logger.CURRENT.set_level(level) 170 | 171 | 172 | def get_level(): 173 | """ 174 | Set logging threshold on current logger. 175 | """ 176 | return Logger.CURRENT.level 177 | 178 | 179 | def get_dir(): 180 | """ 181 | Get directory that log files are being written to. 182 | will be None if there is no output directory (i.e., if you didn't call start) 183 | """ 184 | return Logger.CURRENT.get_dir() 185 | 186 | 187 | def get_expt_dir(): 188 | sys.stderr.write( 189 | "get_expt_dir() is Deprecated. Switch to get_dir() [%s]\n" % (get_dir(),)) 190 | return get_dir() 191 | 192 | 193 | # ================================================================ 194 | # Backend 195 | # ================================================================ 196 | 197 | 198 | class Logger(object): 199 | # A logger with no output files. (See right below class definition) 200 | DEFAULT = None 201 | # So that you can still log to the terminal without setting up any output files 202 | CURRENT = None # Current logger being used by the free functions above 203 | 204 | def __init__(self, dir, output_formats): 205 | self.name2val = OrderedDict() # values this iteration 206 | self.level = INFO 207 | self.dir = dir 208 | self.output_formats = output_formats 209 | 210 | # Logging API, forwarded 211 | # ---------------------------------------- 212 | def logkv(self, key, val): 213 | self.name2val[key] = val 214 | 215 | def dumpkvs(self): 216 | for fmt in self.output_formats: 217 | fmt.writekvs(self.name2val) 218 | self.name2val.clear() 219 | 220 | def log(self, *args, level=INFO): 221 | now = datetime.datetime.now(dateutil.tz.tzlocal()) 222 | timestamp = now.strftime('[%Y-%m-%d %H:%M:%S.%f %Z] ') 223 | if self.level <= level: 224 | self._do_log((timestamp,) + args) 225 | 226 | # Configuration 227 | # ---------------------------------------- 228 | def set_level(self, level): 229 | self.level = level 230 | 231 | def get_dir(self): 232 | return self.dir 233 | 234 | def close(self): 235 | for fmt in self.output_formats: 236 | fmt.close() 237 | 238 | # Misc 239 | # ---------------------------------------- 240 | def _do_log(self, args): 241 | for fmt in self.output_formats: 242 | fmt.writeseq(args) 243 | 244 | 245 | # ================================================================ 246 | 247 | Logger.DEFAULT = Logger( 248 | output_formats=[HumanOutputFormat(sys.stdout)], dir=None) 249 | Logger.CURRENT = Logger.DEFAULT 250 | 251 | 252 | class session(object): 253 | """ 254 | Context manager that sets up the loggers for an experiment. 255 | """ 256 | 257 | CURRENT = None # Set to a LoggerContext object using enter/exit or context manager 258 | 259 | def __init__(self, dir, format_strs=None): 260 | self.dir = dir 261 | if format_strs is None: 262 | format_strs = LOG_OUTPUT_FORMATS 263 | output_formats = [make_output_format(f, dir) for f in format_strs] 264 | Logger.CURRENT = Logger(dir=dir, output_formats=output_formats) 265 | 266 | def __enter__(self): 267 | os.makedirs(self.evaluation_dir(), exist_ok=True) 268 | output_formats = [make_output_format( 269 | f, self.evaluation_dir()) for f in LOG_OUTPUT_FORMATS] 270 | Logger.CURRENT = Logger(dir=self.dir, output_formats=output_formats) 271 | 272 | def __exit__(self, *args): 273 | Logger.CURRENT.close() 274 | Logger.CURRENT = Logger.DEFAULT 275 | 276 | def evaluation_dir(self): 277 | return self.dir 278 | 279 | 280 | # ================================================================ 281 | 282 | 283 | def _demo(): 284 | info("hi") 285 | debug("shouldn't appear") 286 | set_level(DEBUG) 287 | debug("should appear") 288 | dir = "/tmp/testlogging" 289 | if os.path.exists(dir): 290 | shutil.rmtree(dir) 291 | with session(dir=dir): 292 | record_tabular("a", 3) 293 | record_tabular("b", 2.5) 294 | dump_tabular() 295 | record_tabular("b", -2.5) 296 | record_tabular("a", 5.5) 297 | dump_tabular() 298 | info("^^^ should see a = 5.5") 299 | 300 | record_tabular("b", -2.5) 301 | dump_tabular() 302 | 303 | record_tabular("a", "longasslongasslongasslongasslongasslongassvalue") 304 | dump_tabular() 305 | 306 | 307 | if __name__ == "__main__": 308 | _demo() 309 | -------------------------------------------------------------------------------- /labs/lab5/pg.py: -------------------------------------------------------------------------------- 1 | from alg_utils import * 2 | from simplepg.simple_utils import test_once, nprs 3 | import tests.pg_tests 4 | 5 | 6 | def pg(env, env_maker, policy, baseline, n_envs=mp.cpu_count(), last_iter=-1, n_iters=100, batch_size=1000, 7 | optimizer=chainer.optimizers.Adam(), discount=0.99, gae_lambda=0.97, snapshot_saver=None): 8 | """ 9 | This method implements policy gradient algorithm. 10 | :param env: An environment instance, which should have the same class as what env_maker.make() returns. 11 | :param env_maker: An object such that calling env_maker.make() will generate a new environment. 12 | :param policy: A stochastic policy which we will be optimizing. 13 | :param baseline: A baseline used for variance reduction and estimating future returns for unfinished trajectories. 14 | :param n_envs: Number of environments running simultaneously. 15 | :param last_iter: The index of the last iteration. This is normally -1 when starting afresh, but may be different when 16 | loaded from a snapshot. 17 | :param n_iters: The total number of iterations to run. 18 | :param batch_size: The number of samples used per iteration. 19 | :param optimizer: A Chainer optimizer instance. By default we use the Adam algorithm with learning rate 1e-3. 20 | :param discount: Discount factor. 21 | :param gae_lambda: Lambda parameter used for generalized advantage estimation. 22 | :param snapshot_saver: An object for saving snapshots. 23 | """ 24 | 25 | if getattr(optimizer, 'target', None) is not policy: 26 | optimizer.setup(policy) 27 | 28 | logger.info("Starting env pool") 29 | with EnvPool(env_maker, n_envs=n_envs) as env_pool: 30 | for iter in range(last_iter + 1, n_iters): 31 | logger.info("Starting iteration {}".format(iter)) 32 | logger.logkv('Iteration', iter) 33 | 34 | logger.info("Start collecting samples") 35 | trajs = parallel_collect_samples(env_pool, policy, batch_size) 36 | 37 | logger.info("Computing input variables for policy optimization") 38 | all_obs, all_acts, all_advs, _ = compute_pg_vars( 39 | trajs, policy, baseline, discount, gae_lambda 40 | ) 41 | 42 | # Begin policy update 43 | 44 | # Now, you need to implement the computation of the policy gradient 45 | # The policy gradient is given by -1/T \sum_t \nabla_\theta(log(p_\theta(a_t|s_t))) * A_t 46 | # Note the negative sign in the front, since optimizers are most often minimizing a loss rather 47 | # This is the same as \nabla_\theta(-1/T \sum_t log(p_\theta(a_t|s_t)) * A_t) = \nabla_\theta(L), where L is the surrogate loss term 48 | 49 | logger.info("Computing policy gradient") 50 | 51 | # Methods that may be useful: 52 | # - `dists.logli(actions)' returns the log probability of the actions under the distribution `dists'. 53 | # This method returns a chainer variable. 54 | 55 | dists = policy.compute_dists(all_obs) 56 | 57 | def compute_surr_loss(dists, all_acts, all_advs): 58 | """ 59 | :param dists: An instance of subclass of Distribution 60 | :param all_acts: A chainer variable, which should be a matrix of size N * |A| 61 | :param all_advs: A chainer variable, which should be a vector of size N 62 | :return: A chainer variable, which should be a scalar 63 | """ 64 | surr_loss = Variable(np.array(0.)) 65 | logli = dists.logli(all_acts) 66 | surr_loss = -F.mean(logli * all_advs) 67 | "*** YOUR CODE HERE ***" 68 | return surr_loss 69 | 70 | test_once(compute_surr_loss) 71 | 72 | surr_loss = compute_surr_loss(dists, all_acts, all_advs) 73 | 74 | # reset gradients stored in the policy parameters 75 | policy.cleargrads() 76 | surr_loss.backward() 77 | 78 | # apply the computed gradient 79 | optimizer.update() 80 | 81 | # Update baseline 82 | logger.info("Updating baseline") 83 | baseline.update(trajs) 84 | 85 | # log statistics 86 | logger.info("Computing logging information") 87 | logger.logkv('SurrLoss', surr_loss.data) 88 | log_action_distribution_statistics(dists) 89 | log_reward_statistics(env) 90 | log_baseline_statistics(trajs) 91 | logger.dumpkvs() 92 | 93 | if snapshot_saver is not None: 94 | logger.info("Saving snapshot") 95 | snapshot_saver.save_state( 96 | iter, 97 | dict( 98 | alg=pg, 99 | alg_state=dict( 100 | env_maker=env_maker, 101 | policy=policy, 102 | baseline=baseline, 103 | n_envs=n_envs, 104 | last_iter=iter, 105 | n_iters=n_iters, 106 | batch_size=batch_size, 107 | optimizer=optimizer, 108 | discount=discount, 109 | gae_lambda=gae_lambda 110 | ) 111 | ) 112 | ) 113 | -------------------------------------------------------------------------------- /labs/lab5/scripts/generate_key_pairs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from cloudexec import get_cloudexec_config, get_project_root 3 | import boto3 4 | import botocore.exceptions 5 | import os 6 | 7 | if __name__ == "__main__": 8 | config = get_cloudexec_config() 9 | 10 | key_names = dict() 11 | 12 | for region in config.aws_regions: 13 | ec2_client = boto3.client( 14 | "ec2", 15 | region_name=region, 16 | aws_access_key_id=config.aws_access_key, 17 | aws_secret_access_key=config.aws_access_secret, 18 | ) 19 | 20 | key_name = "{attendee_id}_{region}".format( 21 | attendee_id=config.attendee_id, region=region) 22 | 23 | key_names[region] = key_name 24 | 25 | print("Trying to create key pair with name %s" % key_name) 26 | import cloudexec 27 | file_name = cloudexec.local_ec2_key_pair_path(key_name) 28 | 29 | try: 30 | key_pair = ec2_client.create_key_pair(KeyName=key_name) 31 | except botocore.exceptions.ClientError as e: 32 | if e.response['Error']['Code'] == 'InvalidKeyPair.Duplicate': 33 | if os.path.exists(file_name): 34 | print("Key pair with name {key_name} already exists.".format( 35 | key_name=key_name)) 36 | else: 37 | print( 38 | "Key pair with name {key_name} exists remotely, but not locally! To fix this, " 39 | "delete the remote one first".format(key_name=key_name)) 40 | continue 41 | else: 42 | raise e 43 | 44 | print("Saving key pair file") 45 | os.makedirs(os.path.dirname(file_name), exist_ok=True) 46 | with os.fdopen(os.open(file_name, os.O_WRONLY | os.O_CREAT, 0o600), 'w') as handle: 47 | handle.write(key_pair['KeyMaterial'] + '\n') 48 | 49 | print("All set!") 50 | print("Now, edit your cloudexec.yml file, and update the `aws_key_pairs` entry to the following:") 51 | 52 | print() 53 | print("aws_key_pairs:") 54 | for region in config.aws_regions: 55 | print(" - {region}: {key_name}".format(region=region, 56 | key_name=key_names[region])) 57 | -------------------------------------------------------------------------------- /labs/lab5/scripts/resume_training.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from utils import SnapshotSaver 3 | import click 4 | import logger 5 | 6 | 7 | @click.command() 8 | @click.argument("dir") # , "Directory which contains snapshot files") 9 | @click.option("--interval", help="Interval between saving snapshots", type=int, default=10) 10 | def main(dir, interval): 11 | with logger.session(dir): 12 | saver = SnapshotSaver(dir, interval=interval) 13 | state = saver.get_state() 14 | alg_state = state['alg_state'] 15 | env = alg_state['env_maker'].make() 16 | alg = state['alg'] 17 | alg(env=env, snapshot_saver=saver, **alg_state) 18 | 19 | 20 | if __name__ == "__main__": 21 | main() 22 | -------------------------------------------------------------------------------- /labs/lab5/scripts/setup_xquartz.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Check if XQuartz is installed 3 | 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@" 5 | 6 | app_dir=/Applications/Utilities/XQuartz.app 7 | 8 | if [ -d $app_dir ]; then 9 | # Check installed version 10 | app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString) 11 | if [ $app_version == "2.7.11" ]; then 12 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 13 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 14 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 15 | echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!" 16 | exit 17 | else 18 | read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response 19 | case "$response" in 20 | [yY][eE][sS]|[yY]) 21 | ;; 22 | *) 23 | exit 24 | ;; 25 | esac 26 | fi 27 | fi 28 | 29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg 30 | dmg_path=/tmp/xquartz.dmg 31 | echo "Downloading dmg from $url..." 32 | /usr/bin/curl -L -o $dmg_path $url 33 | echo "Mounting dmg file..." 34 | hdiutil mount $dmg_path 35 | echo "Installing..." 36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg -target / 37 | 38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 41 | 42 | echo "Done! Make sure to log out and then log back in for the changes to take effect." 43 | -------------------------------------------------------------------------------- /labs/lab5/scripts/sim_policy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from utils import SnapshotSaver 3 | import click 4 | import time 5 | import os 6 | 7 | 8 | @click.command() 9 | @click.argument("dir") 10 | def main(dir): 11 | env = None 12 | while True: 13 | saver = SnapshotSaver(dir) 14 | state = saver.get_state() 15 | if state is None: 16 | time.sleep(1) 17 | continue 18 | alg_state = state['alg_state'] 19 | if env is None: 20 | env = alg_state['env_maker'].make() 21 | policy = alg_state['policy'] 22 | ob = env.reset() 23 | done = False 24 | while not done: 25 | action, _ = policy.get_action(ob) 26 | ob, _, done, _ = env.step(action) 27 | env.render() 28 | 29 | 30 | if __name__ == "__main__": 31 | main() 32 | -------------------------------------------------------------------------------- /labs/lab5/scripts/sync_s3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import cloudexec 4 | import os 5 | import argparse 6 | import subprocess 7 | 8 | if __name__ == "__main__": 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('folder', type=str) 11 | parser.add_argument('--all', action='store_true', default=False) 12 | args = parser.parse_args() 13 | remote_dir = "s3://{bucket}/{bucket_root}/experiments".format( 14 | bucket=cloudexec.get_cloudexec_config().s3_bucket, 15 | bucket_root=cloudexec.get_cloudexec_config().s3_bucket_root 16 | ) 17 | local_dir = os.path.join(cloudexec.get_project_root(), "data", "s3") 18 | if args.folder: 19 | remote_dir = os.path.join(remote_dir, args.folder) 20 | local_dir = os.path.join(local_dir, args.folder) 21 | s3_env = dict( 22 | os.environ, 23 | AWS_ACCESS_KEY_ID=cloudexec.get_cloudexec_config().aws_access_key, 24 | AWS_SECRET_ACCESS_KEY=cloudexec.get_cloudexec_config().aws_access_secret, 25 | AWS_REGION=cloudexec.get_cloudexec_config().aws_s3_region, 26 | ) 27 | if not args.all: 28 | command = (""" 29 | aws s3 sync --exclude '*' {s3_periodic_sync_include_flags} --content-type "UTF-8" {remote_dir} {local_dir} 30 | """.format(local_dir=local_dir, remote_dir=remote_dir, 31 | s3_periodic_sync_include_flags=cloudexec.get_cloudexec_config().s3_periodic_sync_include_flags)) 32 | else: 33 | command = (""" 34 | aws s3 sync --content-type "UTF-8" {remote_dir} {local_dir} 35 | """.format(local_dir=local_dir, remote_dir=remote_dir)) 36 | subprocess.check_call(command, shell=True, env=s3_env) 37 | -------------------------------------------------------------------------------- /labs/lab5/scripts/test_ec2_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | def main(): 5 | import cloudexec 6 | import boto3 7 | import botocore.exceptions 8 | import os 9 | import subprocess 10 | config = cloudexec.get_cloudexec_config() 11 | 12 | assert len({ 13 | config.attendee_id, 14 | config.ec2_instance_label, 15 | config.s3_bucket_root 16 | }) == 1, "attendee_id, ec2_instance_label, s3_bucket_root should have the same value" 17 | 18 | print("Testing attendee_id, aws_access_key, and aws_access_secret...") 19 | 20 | iam_client = boto3.client( 21 | "iam", 22 | region_name=config.aws_regions[0], 23 | aws_access_key_id=config.aws_access_key, 24 | aws_secret_access_key=config.aws_access_secret, 25 | ) 26 | try: 27 | iam_client.list_access_keys(UserName=config.attendee_id) 28 | except botocore.exceptions.ClientError as e: 29 | if e.response['Error']['Code'] == 'InvalidClientTokenId': 30 | print("aws_access_key is not set properly!") 31 | exit() 32 | elif e.response['Error']['Code'] == 'SignatureDoesNotMatch': 33 | print("aws_access_secret is not set properly!") 34 | exit() 35 | elif e.response['Error']['Code'] == 'AccessDenied': 36 | print("attendee_id is not set properly!") 37 | exit() 38 | else: 39 | raise e 40 | 41 | # Check if key pair exists 42 | 43 | for region in config.aws_regions: 44 | print("Checking key pair in region %s" % region) 45 | if region not in config.aws_key_pairs: 46 | print("Key pair in region %s is not set properly!" % region) 47 | exit() 48 | key_pair_name = config.aws_key_pairs[region] 49 | key_pair_path = cloudexec.local_ec2_key_pair_path(key_pair_name) 50 | if not os.path.exists(key_pair_path): 51 | print("Missing local key pair file at %s" % key_pair_path) 52 | exit() 53 | ec2_client = boto3.client( 54 | "ec2", 55 | region_name=region, 56 | aws_access_key_id=config.aws_access_key, 57 | aws_secret_access_key=config.aws_access_secret, 58 | ) 59 | try: 60 | response = ec2_client.describe_key_pairs( 61 | KeyNames=[config.aws_key_pairs[region]] 62 | ) 63 | except botocore.exceptions.ClientError as e: 64 | if e.response['Error']['Code'] == 'InvalidKeyPair.NotFound': 65 | print("Key pair in region %s is not set properly!" % region) 66 | exit() 67 | else: 68 | raise e 69 | remote_fingerprint = response['KeyPairs'][0]['KeyFingerprint'] 70 | 71 | # Get local key fingerprint 72 | 73 | ps = subprocess.Popen( 74 | ["openssl", "pkcs8", "-in", key_pair_path, 75 | "-nocrypt", "-topk8", "-outform", "DER"], 76 | stdout=subprocess.PIPE 77 | ) 78 | local_fingerprint = subprocess.check_output( 79 | ["openssl", "sha1", "-c"], stdin=ps.stdout) 80 | # Strip irrelevant information 81 | local_fingerprint = local_fingerprint.decode().split('= ')[-1][:-1] 82 | 83 | if remote_fingerprint != local_fingerprint: 84 | print("Local key pair file does not match EC2 record!") 85 | exit() 86 | 87 | print("Your EC2 configuration has passed all checks!") 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /labs/lab5/scripts/test_environment_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | def main(): 5 | import roboschool 6 | import gym 7 | import chainer 8 | env = gym.make('CartPole-v0') 9 | env.reset() 10 | env.step(env.action_space.sample()) 11 | env = gym.make('RoboschoolHalfCheetah-v1') 12 | env.reset() 13 | env.step(env.action_space.sample()) 14 | print("Your environment has been successfully set up!") 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /labs/lab5/simplepg/point_env.py: -------------------------------------------------------------------------------- 1 | from gym import Env 2 | from gym.envs.registration import register 3 | from gym.utils import seeding 4 | from gym import spaces 5 | from gym.envs.classic_control.cartpole import CartPoleEnv 6 | import numpy as np 7 | 8 | 9 | class PointEnv(Env): 10 | metadata = { 11 | 'render.modes': ['human', 'rgb_array'], 12 | 'video.frames_per_second': 50 13 | } 14 | 15 | def __init__(self): 16 | self.action_space = spaces.Box(low=-1, high=1, shape=(2,)) 17 | self.observation_space = spaces.Box(low=-1, high=1, shape=(2,)) 18 | 19 | self._seed() 20 | self.viewer = None 21 | self.state = None 22 | 23 | def _seed(self, seed=None): 24 | self.np_random, seed = seeding.np_random(seed) 25 | return [seed] 26 | 27 | def _step(self, action): 28 | action = np.clip(action, -0.025, 0.025) 29 | self.state = np.clip(self.state + action, -1, 1) 30 | return np.array(self.state), -np.linalg.norm(self.state), False, {} 31 | 32 | def _reset(self): 33 | while True: 34 | self.state = self.np_random.uniform(low=-1, high=1, size=(2,)) 35 | # Sample states that are far away 36 | if np.linalg.norm(self.state) > 0.9: 37 | break 38 | return np.array(self.state) 39 | 40 | # def _render(self, mode='human', close=False): 41 | # pass 42 | 43 | def _render(self, mode='human', close=False): 44 | if close: 45 | if self.viewer is not None: 46 | self.viewer.close() 47 | self.viewer = None 48 | return 49 | 50 | screen_width = 800 51 | screen_height = 800 52 | 53 | if self.viewer is None: 54 | from gym.envs.classic_control import rendering 55 | self.viewer = rendering.Viewer(screen_width, screen_height) 56 | 57 | agent = rendering.make_circle( 58 | min(screen_height, screen_width) * 0.03) 59 | origin = rendering.make_circle( 60 | min(screen_height, screen_width) * 0.03) 61 | trans = rendering.Transform(translation=(0, 0)) 62 | agent.add_attr(trans) 63 | self.trans = trans 64 | agent.set_color(1, 0, 0) 65 | origin.set_color(0, 0, 0) 66 | origin.add_attr(rendering.Transform( 67 | translation=(screen_width // 2, screen_height // 2))) 68 | self.viewer.add_geom(agent) 69 | self.viewer.add_geom(origin) 70 | 71 | # self.trans.set_translation(0, 0) 72 | self.trans.set_translation( 73 | (self.state[0] + 1) / 2 * screen_width, 74 | (self.state[1] + 1) / 2 * screen_height, 75 | ) 76 | 77 | return self.viewer.render(return_rgb_array=mode == 'rgb_array') 78 | 79 | 80 | register( 81 | 'Point-v0', 82 | entry_point='simplepg.point_env:PointEnv', 83 | timestep_limit=40, 84 | ) 85 | -------------------------------------------------------------------------------- /labs/lab5/simplepg/rollout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import click 3 | import numpy as np 4 | import gym 5 | 6 | from simplepg.simple_utils import include_bias, weighted_sample 7 | 8 | 9 | def point_get_action(theta, ob, rng=np.random): 10 | ob_1 = include_bias(ob) 11 | mean = theta.dot(ob_1) 12 | return rng.normal(loc=mean, scale=1.) 13 | 14 | 15 | def cartpole_get_action(theta, ob, rng=np.random): 16 | ob_1 = include_bias(ob) 17 | logits = ob_1.dot(theta.T) 18 | return weighted_sample(logits, rng=rng) 19 | 20 | 21 | @click.command() 22 | @click.argument("env_id", type=str, default="Point-v0") 23 | def main(env_id): 24 | # Register the environment 25 | rng = np.random.RandomState(42) 26 | 27 | if env_id == 'CartPole-v0': 28 | env = gym.make('CartPole-v0') 29 | get_action = cartpole_get_action 30 | obs_dim = env.observation_space.shape[0] 31 | action_dim = env.action_space.n 32 | elif env_id == 'Point-v0': 33 | from simplepg import point_env 34 | env = gym.make('Point-v0') 35 | get_action = point_get_action 36 | obs_dim = env.observation_space.shape[0] 37 | action_dim = env.action_space.shape[0] 38 | else: 39 | raise ValueError( 40 | "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'") 41 | 42 | env.seed(42) 43 | 44 | # Initialize parameters 45 | theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1)) 46 | 47 | while True: 48 | ob = env.reset() 49 | done = False 50 | # Only render the first trajectory 51 | # Collect a new trajectory 52 | rewards = [] 53 | while not done: 54 | action = get_action(theta, ob, rng=rng) 55 | next_ob, rew, done, _ = env.step(action) 56 | ob = next_ob 57 | env.render() 58 | rewards.append(rew) 59 | 60 | print("Episode reward: %.2f" % np.sum(rewards)) 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /labs/lab5/simplepg/simple_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.special 3 | import chainer 4 | 5 | 6 | # Compute gradient approximately using finite difference 7 | def numerical_grad(f, x, eps=1e-8): 8 | grad = np.zeros_like(x) 9 | for i in range(len(x)): 10 | xplus = np.array(x) 11 | xplus[i] += eps 12 | fplus = f(xplus) 13 | xminus = np.array(x) 14 | xminus[i] -= eps 15 | fminus = f(xminus) 16 | grad[i] = (fplus - fminus) / (2 * eps) 17 | return grad 18 | 19 | 20 | def gradient_check(f, g, x): 21 | # Test the implementation of g(x) = df/dx 22 | # Perform numerical differentiation and test it 23 | g_num = numerical_grad(f, x) 24 | g_test = g(x) 25 | try: 26 | np.testing.assert_allclose(g_num, g_test, rtol=1e-5) 27 | print("Gradient check passed!") 28 | except AssertionError as e: 29 | print(e) 30 | print("Error: Gradient check didn't pass!") 31 | exit() 32 | 33 | 34 | def log_softmax(logits): 35 | return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True) 36 | 37 | 38 | def softmax(logits): 39 | x = logits 40 | x = x - np.max(x, axis=-1, keepdims=True) 41 | x = np.exp(x) 42 | return x / np.sum(x, axis=-1, keepdims=True) 43 | 44 | 45 | def weighted_sample(logits, rng=np.random): 46 | weights = softmax(logits) 47 | return min( 48 | int(np.sum(rng.uniform() > np.cumsum(weights))), 49 | len(weights) - 1 50 | ) 51 | 52 | 53 | def include_bias(x): 54 | # Add a constant term (1.0) to each entry in x 55 | return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1) 56 | 57 | 58 | _tested = set() 59 | _tests = dict() 60 | 61 | nprs = np.random.RandomState 62 | 63 | 64 | def register_test(fn_name, kwargs, desired_output=None): 65 | assert fn_name not in _tests 66 | _tests[fn_name] = (kwargs, desired_output) 67 | 68 | 69 | def assert_allclose(a, b): 70 | if isinstance(a, (np.ndarray, float, int)): 71 | np.testing.assert_allclose(a, b, rtol=1e-5) 72 | elif isinstance(a, (tuple, list)): 73 | assert isinstance(b, (tuple, list)) 74 | assert len(a) == len(b) 75 | for a_i, b_i in zip(a, b): 76 | assert_allclose(a_i, b_i) 77 | elif isinstance(a, chainer.Variable): 78 | assert isinstance(b, chainer.Variable) 79 | assert_allclose(a.data, b.data) 80 | else: 81 | raise NotImplementedError 82 | 83 | 84 | def test_once(fn): 85 | module = fn.__module__ 86 | name = fn.__name__ 87 | key = module + "." + name 88 | if key in _tested: 89 | return 90 | assert key in _tests, "Test for %s not found!" % key 91 | kwargs, desired_output = _tests[key] 92 | _tested.add(key) 93 | 94 | if callable(kwargs): 95 | kwargs = kwargs() 96 | 97 | if callable(desired_output): 98 | desired_output = desired_output() 99 | 100 | if desired_output is None: 101 | print("Desired output for %s:" % key, repr(fn(**kwargs))) 102 | exit() 103 | else: 104 | try: 105 | output = fn(**kwargs) 106 | assert_allclose(desired_output, output) 107 | print("Test for %s passed!" % key) 108 | except AssertionError as e: 109 | print(e) 110 | print("Error: test for %s didn't pass!" % key) 111 | exit() 112 | -------------------------------------------------------------------------------- /labs/lab5/tests/a2c_tests.py: -------------------------------------------------------------------------------- 1 | from simplepg.simple_utils import register_test, nprs 2 | import numpy as np 3 | from chainer import Variable 4 | 5 | register_test( 6 | "a2c.compute_returns_advantages", 7 | kwargs=lambda: dict( 8 | rewards=nprs(0).uniform(size=(5, 2)), 9 | dones=nprs(1).choice([True, False], size=(5, 2)), 10 | values=nprs(2).uniform(size=(5, 2)), 11 | next_values=nprs(3).uniform(size=(2,)), 12 | discount=0.99, 13 | ), 14 | desired_output=lambda: ( 15 | np.array([[1.14554925, 1.25462372], 16 | [0.60276338, 0.54488318], 17 | [2.33579066, 1.90456042], 18 | [1.93145037, 1.2713801], 19 | [1.50895268, 0.38344152]]), 20 | np.array([[0.70955434, 1.22869749], 21 | [0.0531009, 0.10956079], 22 | [1.91542286, 1.5742256], 23 | [1.72680173, 0.65210914], 24 | [1.20929801, 0.11661424]]) 25 | ) 26 | ) 27 | 28 | register_test( 29 | "a2c.compute_total_loss", 30 | kwargs=lambda: dict( 31 | logli=Variable(nprs(0).uniform(size=(10,)).astype(np.float32)), 32 | all_advs=Variable(nprs(1).uniform(size=(10,)).astype(np.float32)), 33 | ent_coeff=nprs(2).uniform(), 34 | ent=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)), 35 | vf_loss_coeff=nprs(4).uniform(), 36 | all_returns=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)), 37 | all_values=Variable(nprs(6).uniform(size=(10,)).astype(np.float32)), 38 | ), 39 | desired_output=lambda: ( 40 | Variable(np.array(-0.4047563076019287, dtype=np.float32)), 41 | Variable(np.array(0.22883716225624084, dtype=np.float32)), 42 | Variable(np.array(-0.1834639459848404, dtype=np.float32)) 43 | ) 44 | ) 45 | -------------------------------------------------------------------------------- /labs/lab5/tests/pg_tests.py: -------------------------------------------------------------------------------- 1 | from chainer import Variable 2 | 3 | from simplepg.simple_utils import register_test, nprs 4 | from utils import Gaussian 5 | import numpy as np 6 | 7 | register_test( 8 | "pg.compute_surr_loss", 9 | kwargs=lambda: dict( 10 | dists=Gaussian( 11 | means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)), 12 | log_stds=Variable(nprs(1).uniform( 13 | size=(10, 3)).astype(np.float32)), 14 | ), 15 | all_acts=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)), 16 | all_advs=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)), 17 | ), 18 | desired_output=lambda: Variable( 19 | np.array(1.9201269149780273, dtype=np.float32)) 20 | ) 21 | -------------------------------------------------------------------------------- /labs/lab5/tests/simplepg_tests.py: -------------------------------------------------------------------------------- 1 | from simplepg.simple_utils import register_test, nprs 2 | import numpy as np 3 | 4 | register_test( 5 | "__main__.compute_update", 6 | kwargs=lambda: dict( 7 | discount=0.99, 8 | R_tplus1=1.0, 9 | theta=nprs(0).uniform(size=(2, 2)), 10 | s_t=nprs(1).uniform(size=(1,)), 11 | a_t=nprs(2).choice(2), 12 | r_t=nprs(3).uniform(), 13 | b_t=nprs(4).uniform(), 14 | get_grad_logp_action=lambda theta, *_: theta * 2 15 | ), 16 | desired_output=lambda: ( 17 | 1.5407979025745755, 18 | np.array([[0.62978332, 0.82070564], [0.69169275, 0.62527314]]) 19 | ) 20 | ) 21 | 22 | register_test( 23 | "__main__.compute_baselines", 24 | kwargs=lambda: dict( 25 | all_returns=[ 26 | nprs(0).uniform(size=(10,)), 27 | nprs(1).uniform(size=(20,)), 28 | [], 29 | ], 30 | ), 31 | desired_output=lambda: np.array([0.61576628, 0.36728075, 0.]) 32 | ) 33 | 34 | register_test( 35 | "__main__.compute_fisher_matrix", 36 | kwargs=lambda: dict( 37 | theta=nprs(1).uniform(size=(2, 2)), 38 | get_grad_logp_action=lambda theta, ob, action: np.exp( 39 | theta) * np.linalg.norm(action), 40 | all_observations=list(nprs(2).uniform(size=(5, 1))), 41 | all_actions=list(nprs(3).choice(2, size=(5,))), 42 | ), 43 | desired_output=lambda: np.array([[0.92104469, 1.24739299, 0.60704379, 0.82124306], 44 | [1.24739299, 1.68937435, 45 | 0.82213401, 1.11222925], 46 | [0.60704379, 0.82213401, 47 | 0.40009151, 0.54126635], 48 | [0.82124306, 1.11222925, 0.54126635, 0.73225564]]) 49 | ) 50 | 51 | register_test( 52 | "__main__.compute_natural_gradient", 53 | kwargs=lambda: dict( 54 | F=nprs(0).uniform(size=(4, 4)), 55 | grad=nprs(1).uniform(size=(2, 2)), 56 | reg=1e-3, 57 | ), 58 | desired_output=lambda: np.array( 59 | [[-0.44691565, 0.5477328], [-0.20366472, 0.72267091]]) 60 | ) 61 | 62 | register_test( 63 | "__main__.compute_step_size", 64 | kwargs=lambda: dict( 65 | F=nprs(0).uniform(size=(2, 2)), 66 | natural_grad=nprs(1).uniform(size=(1, 2)), 67 | natural_step_size=1e-2, 68 | ), 69 | desired_output=lambda: 0.1607407366467048, 70 | ) 71 | -------------------------------------------------------------------------------- /labs/lab5/tests/trpo_tests.py: -------------------------------------------------------------------------------- 1 | from simplepg.simple_utils import register_test, nprs 2 | import numpy as np 3 | from chainer import Variable 4 | 5 | from utils import Gaussian 6 | 7 | register_test( 8 | "trpo.compute_surr_loss", 9 | kwargs=lambda: dict( 10 | old_dists=Gaussian( 11 | means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)), 12 | log_stds=Variable(nprs(1).uniform( 13 | size=(10, 3)).astype(np.float32)), 14 | ), 15 | new_dists=Gaussian( 16 | means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)), 17 | log_stds=Variable(nprs(3).uniform( 18 | size=(10, 3)).astype(np.float32)), 19 | ), 20 | all_acts=Variable(nprs(4).uniform(size=(10, 3)).astype(np.float32)), 21 | all_advs=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)), 22 | ), 23 | desired_output=lambda: Variable( 24 | np.array(-0.5629823207855225, dtype=np.float32)) 25 | ) 26 | 27 | register_test( 28 | "trpo.compute_kl", 29 | kwargs=lambda: dict( 30 | old_dists=Gaussian( 31 | means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)), 32 | log_stds=Variable(nprs(1).uniform( 33 | size=(10, 3)).astype(np.float32)), 34 | ), 35 | new_dists=Gaussian( 36 | means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)), 37 | log_stds=Variable(nprs(3).uniform( 38 | size=(10, 3)).astype(np.float32)), 39 | ), 40 | ), 41 | desired_output=lambda: Variable( 42 | np.array(0.5306503176689148, dtype=np.float32)) 43 | ) 44 | -------------------------------------------------------------------------------- /labs/lab5/viskit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab5/viskit/__init__.py -------------------------------------------------------------------------------- /labs/lab5/viskit/core.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import itertools 3 | import json 4 | import os 5 | 6 | import numpy as np 7 | 8 | 9 | # from sandbox.rocky.utils.py_utils import AttrDict 10 | 11 | class AttrDict(dict): 12 | def __init__(self, *args, **kwargs): 13 | super(AttrDict, self).__init__(*args, **kwargs) 14 | self.__dict__ = self 15 | 16 | 17 | def unique(l): 18 | return list(set(l)) 19 | 20 | 21 | def flatten(l): 22 | return [item for sublist in l for item in sublist] 23 | 24 | 25 | def load_progress(progress_json_path, verbose=True): 26 | if verbose: 27 | print("Reading %s" % progress_json_path) 28 | entries = dict() 29 | rows = [] 30 | with open(progress_json_path, 'r') as f: 31 | lines = f.read().split('\n') 32 | for line in lines: 33 | if len(line) > 0: 34 | row = json.loads(line) 35 | rows.append(row) 36 | all_keys = set(k for row in rows for k in row.keys()) 37 | for k in all_keys: 38 | if k not in entries: 39 | entries[k] = [] 40 | for row in rows: 41 | if k in row: 42 | v = row[k] 43 | try: 44 | entries[k].append(float(v)) 45 | except: 46 | entries[k].append(np.nan) 47 | else: 48 | entries[k].append(np.nan) 49 | 50 | # entries[key] = [row.get(key, np.nan) for row in rows] 51 | # added_keys = set() 52 | # for k, v in row.items(): 53 | # if k not in entries: 54 | # entries[k] = [] 55 | # try: 56 | # entries[k].append(float(v)) 57 | # except: 58 | # entries[k].append(0.) 59 | # added_keys.add(k) 60 | # for k in entries.keys(): 61 | # if k not in added_keys: 62 | # entries[k].append(np.nan) 63 | entries = dict([(k, np.array(v)) for k, v in entries.items()]) 64 | return entries 65 | 66 | 67 | def flatten_dict(d): 68 | flat_params = dict() 69 | for k, v in d.items(): 70 | if isinstance(v, dict): 71 | v = flatten_dict(v) 72 | for subk, subv in flatten_dict(v).items(): 73 | flat_params[k + "." + subk] = subv 74 | else: 75 | flat_params[k] = v 76 | return flat_params 77 | 78 | 79 | def load_params(params_json_path): 80 | with open(params_json_path, 'r') as f: 81 | data = json.loads(f.read()) 82 | if "args_data" in data: 83 | del data["args_data"] 84 | if "exp_name" not in data: 85 | data["exp_name"] = params_json_path.split("/")[-2] 86 | return data 87 | 88 | 89 | def lookup(d, keys): 90 | if not isinstance(keys, list): 91 | keys = keys.split(".") 92 | for k in keys: 93 | if hasattr(d, "__getitem__"): 94 | if k in d: 95 | d = d[k] 96 | else: 97 | return None 98 | else: 99 | return None 100 | return d 101 | 102 | 103 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True): 104 | if isinstance(exp_folder_paths, str): 105 | exp_folder_paths = [exp_folder_paths] 106 | exps = [] 107 | for exp_folder_path in exp_folder_paths: 108 | exps += [x[0] for x in os.walk(exp_folder_path)] 109 | if verbose: 110 | print("finished walking exp folders") 111 | exps_data = [] 112 | for exp in exps: 113 | try: 114 | exp_path = exp 115 | variant_json_path = os.path.join(exp_path, "variant.json") 116 | progress_json_path = os.path.join(exp_path, "progress.json") 117 | progress = load_progress(progress_json_path, verbose=verbose) 118 | try: 119 | params = load_params(variant_json_path) 120 | except IOError: 121 | params = dict(exp_name="experiment") 122 | exps_data.append(AttrDict( 123 | progress=progress, params=params, flat_params=flatten_dict(params))) 124 | except IOError as e: 125 | if verbose: 126 | print(e) 127 | 128 | # a dictionary of all keys and types of values 129 | all_keys = dict() 130 | for data in exps_data: 131 | for key in data.flat_params.keys(): 132 | if key not in all_keys: 133 | all_keys[key] = type(data.flat_params[key]) 134 | 135 | # if any data does not have some key, specify the value of it 136 | if not ignore_missing_keys: 137 | default_values = dict() 138 | for data in exps_data: 139 | for key in sorted(all_keys.keys()): 140 | if key not in data.flat_params: 141 | if key not in default_values: 142 | default = None 143 | default_values[key] = default 144 | data.flat_params[key] = default_values[key] 145 | 146 | return exps_data 147 | 148 | 149 | def smart_repr(x): 150 | if isinstance(x, tuple): 151 | if len(x) == 0: 152 | return "tuple()" 153 | elif len(x) == 1: 154 | return "(%s,)" % smart_repr(x[0]) 155 | else: 156 | return "(" + ",".join(map(smart_repr, x)) + ")" 157 | else: 158 | if hasattr(x, "__call__"): 159 | return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__) 160 | else: 161 | return repr(x) 162 | 163 | 164 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1): 165 | try: 166 | stringified_pairs = sorted( 167 | map( 168 | eval, 169 | unique( 170 | flatten( 171 | [ 172 | list( 173 | map( 174 | smart_repr, 175 | list(d.flat_params.items()) 176 | ) 177 | ) 178 | for d in exps_data 179 | ] 180 | ) 181 | ) 182 | ), 183 | key=lambda x: ( 184 | tuple("" if it is None else str(it) for it in x), 185 | ) 186 | ) 187 | except Exception as e: 188 | print(e) 189 | import ipdb 190 | ipdb.set_trace() 191 | proposals = [(k, [x[1] for x in v]) 192 | for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])] 193 | filtered = [(k, v) for (k, v) in proposals if len(v) > l and all( 194 | [k.find(excluded_param) != 0 for excluded_param in excluded_params])] 195 | return filtered 196 | 197 | 198 | class Selector(object): 199 | def __init__(self, exps_data, filters=None, custom_filters=None): 200 | self._exps_data = exps_data 201 | if filters is None: 202 | self._filters = tuple() 203 | else: 204 | self._filters = tuple(filters) 205 | if custom_filters is None: 206 | self._custom_filters = [] 207 | else: 208 | self._custom_filters = custom_filters 209 | 210 | def where(self, k, v): 211 | return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters) 212 | 213 | def custom_filter(self, filter): 214 | return Selector(self._exps_data, self._filters, self._custom_filters + [filter]) 215 | 216 | def _check_exp(self, exp): 217 | # or exp.flat_params.get(k, None) is None 218 | return all( 219 | ((str(exp.flat_params.get(k, None)) == str(v) or ( 220 | k not in exp.flat_params)) for k, v in self._filters) 221 | ) and all(custom_filter(exp) for custom_filter in self._custom_filters) 222 | 223 | def extract(self): 224 | return list(filter(self._check_exp, self._exps_data)) 225 | 226 | def iextract(self): 227 | return filter(self._check_exp, self._exps_data) 228 | 229 | 230 | # Taken from plot.ly 231 | color_defaults = [ 232 | '#1f77b4', # muted blue 233 | '#ff7f0e', # safety orange 234 | '#2ca02c', # cooked asparagus green 235 | '#d62728', # brick red 236 | '#9467bd', # muted purple 237 | '#8c564b', # chestnut brown 238 | '#e377c2', # raspberry yogurt pink 239 | '#7f7f7f', # middle gray 240 | '#bcbd22', # curry yellow-green 241 | '#17becf' # blue-teal 242 | ] 243 | 244 | 245 | def hex_to_rgb(hex, opacity=1.0): 246 | if hex[0] == '#': 247 | hex = hex[1:] 248 | assert (len(hex) == 6) 249 | return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity) 250 | -------------------------------------------------------------------------------- /labs/setup.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/setup.pdf -------------------------------------------------------------------------------- /labs/setup/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | viskit_port=$("$DIR/findport.py" 5000 1) 4 | xhost=xhost 5 | if hash nvidia-docker 2>/dev/null; then 6 | docker=nvidia-docker 7 | else 8 | docker=docker 9 | fi 10 | 11 | if [[ $(uname) == 'Darwin' ]]; then 12 | # if xhost not defined, check 13 | if ! hash $xhost 2>/dev/null; then 14 | xhost=/opt/X11/bin/xhost 15 | if [ ! -f $xhost ]; then 16 | echo "xhost not found!" 17 | exit 18 | fi 19 | fi 20 | ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}') 21 | $xhost + $ip >/dev/null 22 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 23 | -e DISPLAY=$ip:0 \ 24 | -v "$DIR":/root/code/bootcamp_pg \ 25 | -ti dementrock/deeprlbootcamp \ 26 | ${1-/bin/bash} "${@:2}" 27 | $xhost - $ip >/dev/null 28 | elif [[ $(uname) == 'Linux' ]]; then 29 | $xhost +local:root >/dev/null 30 | $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 31 | -e DISPLAY=$DISPLAY \ 32 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 33 | -v "$DIR":/root/code/bootcamp_pg \ 34 | -ti dementrock/deeprlbootcamp \ 35 | ${1-/bin/bash} "${@:2}" 36 | $xhost -local:root >/dev/null 37 | else 38 | echo "This script only supports macOS or Linux" 39 | fi 40 | -------------------------------------------------------------------------------- /labs/setup/docker_run_vnc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | vnc_port=$("$DIR/findport.py" 3000 1) 4 | viskit_port=$("$DIR/findport.py" 5000 1) 5 | 6 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284" 7 | docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \ 8 | -v "$DIR":/root/code/bootcamp_pg \ 9 | -ti dementrock/deeprlbootcamp \ 10 | ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}" 11 | -------------------------------------------------------------------------------- /labs/setup/environment.yml: -------------------------------------------------------------------------------- 1 | name: deeprlbootcamp 2 | dependencies: 3 | - python==3.5.3 4 | - numpy==1.13.1 5 | - notebook==5.0.0 6 | - pip: 7 | - gym==0.9.2 8 | - chainer==2.0.1 9 | - matplotlib==2.0.2 10 | -------------------------------------------------------------------------------- /labs/setup/findport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Usage: findport.py 3000 100 4 | # 5 | from __future__ import print_function 6 | import socket 7 | from contextlib import closing 8 | import sys 9 | 10 | if len(sys.argv) != 3: 11 | print("Usage: {} ".format(sys.argv[0])) 12 | sys.exit(1) 13 | 14 | base = int(sys.argv[1]) 15 | increment = int(sys.argv[2]) 16 | 17 | 18 | def find_free_port(): 19 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 20 | for port in range(base, 65536, increment): 21 | try: 22 | s.bind(('', port)) 23 | return s.getsockname()[1] 24 | except socket.error: 25 | continue 26 | 27 | 28 | print(find_free_port()) 29 | -------------------------------------------------------------------------------- /labs/setup/launch_bg_screen_buffer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | killall() { 4 | kill -INT "$xvfb_pid" 5 | kill -INT "$x11vnc_pid" 6 | exit 7 | } 8 | 9 | trap killall SIGINT 10 | trap killall SIGTERM 11 | trap killall SIGKILL 12 | 13 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$! 14 | 15 | mkdir ~/.x11vnc 16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd 17 | 18 | command="${1-/bin/bash} ${@:2}" 19 | 20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!" 21 | 22 | DISPLAY=:99 $command 23 | 24 | killall 25 | -------------------------------------------------------------------------------- /labs/setup/scripts/setup_xquartz.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Check if XQuartz is installed 3 | 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@" 5 | 6 | app_dir=/Applications/Utilities/XQuartz.app 7 | 8 | if [ -d $app_dir ]; then 9 | # Check installed version 10 | app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString) 11 | if [ $app_version == "2.7.11" ]; then 12 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 13 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 14 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 15 | echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!" 16 | exit 17 | else 18 | read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response 19 | case "$response" in 20 | [yY][eE][sS]|[yY]) 21 | ;; 22 | *) 23 | exit 24 | ;; 25 | esac 26 | fi 27 | fi 28 | 29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg 30 | dmg_path=/tmp/xquartz.dmg 31 | echo "Downloading dmg from $url..." 32 | /usr/bin/curl -L -o $dmg_path $url 33 | echo "Mounting dmg file..." 34 | hdiutil mount $dmg_path 35 | echo "Installing..." 36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg -target / 37 | 38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false 39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false 40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true 41 | 42 | echo "Done! Make sure to log out and then log back in for the changes to take effect." 43 | -------------------------------------------------------------------------------- /labs/setup/scripts/test_environment_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | def main(): 5 | import roboschool 6 | import gym 7 | import chainer 8 | env = gym.make('CartPole-v0') 9 | env.reset() 10 | env.step(env.action_space.sample()) 11 | env = gym.make('RoboschoolHalfCheetah-v1') 12 | env.reset() 13 | env.step(env.action_space.sample()) 14 | print("Your environment has been successfully set up!") 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /labs/setup/simplepg/__pycache__/point_env.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/setup/simplepg/__pycache__/point_env.cpython-35.pyc -------------------------------------------------------------------------------- /labs/setup/simplepg/__pycache__/simple_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/setup/simplepg/__pycache__/simple_utils.cpython-35.pyc -------------------------------------------------------------------------------- /labs/setup/simplepg/point_env.py: -------------------------------------------------------------------------------- 1 | from gym import Env 2 | from gym.envs.registration import register 3 | from gym.utils import seeding 4 | from gym import spaces 5 | from gym.envs.classic_control.cartpole import CartPoleEnv 6 | import numpy as np 7 | 8 | 9 | class PointEnv(Env): 10 | metadata = { 11 | 'render.modes': ['human', 'rgb_array'], 12 | 'video.frames_per_second': 50 13 | } 14 | 15 | def __init__(self): 16 | self.action_space = spaces.Box(low=-1, high=1, shape=(2,)) 17 | self.observation_space = spaces.Box(low=-1, high=1, shape=(2,)) 18 | 19 | self._seed() 20 | self.viewer = None 21 | self.state = None 22 | 23 | def _seed(self, seed=None): 24 | self.np_random, seed = seeding.np_random(seed) 25 | return [seed] 26 | 27 | def _step(self, action): 28 | action = np.clip(action, -0.025, 0.025) 29 | self.state = np.clip(self.state + action, -1, 1) 30 | return np.array(self.state), -np.linalg.norm(self.state), False, {} 31 | 32 | def _reset(self): 33 | while True: 34 | self.state = self.np_random.uniform(low=-1, high=1, size=(2,)) 35 | # Sample states that are far away 36 | if np.linalg.norm(self.state) > 0.9: 37 | break 38 | return np.array(self.state) 39 | 40 | # def _render(self, mode='human', close=False): 41 | # pass 42 | 43 | def _render(self, mode='human', close=False): 44 | if close: 45 | if self.viewer is not None: 46 | self.viewer.close() 47 | self.viewer = None 48 | return 49 | 50 | screen_width = 800 51 | screen_height = 800 52 | 53 | if self.viewer is None: 54 | from gym.envs.classic_control import rendering 55 | self.viewer = rendering.Viewer(screen_width, screen_height) 56 | 57 | agent = rendering.make_circle( 58 | min(screen_height, screen_width) * 0.03) 59 | origin = rendering.make_circle( 60 | min(screen_height, screen_width) * 0.03) 61 | trans = rendering.Transform(translation=(0, 0)) 62 | agent.add_attr(trans) 63 | self.trans = trans 64 | agent.set_color(1, 0, 0) 65 | origin.set_color(0, 0, 0) 66 | origin.add_attr(rendering.Transform( 67 | translation=(screen_width // 2, screen_height // 2))) 68 | self.viewer.add_geom(agent) 69 | self.viewer.add_geom(origin) 70 | 71 | # self.trans.set_translation(0, 0) 72 | self.trans.set_translation( 73 | (self.state[0] + 1) / 2 * screen_width, 74 | (self.state[1] + 1) / 2 * screen_height, 75 | ) 76 | 77 | return self.viewer.render(return_rgb_array=mode == 'rgb_array') 78 | 79 | 80 | register( 81 | 'Point-v0', 82 | entry_point='simplepg.point_env:PointEnv', 83 | timestep_limit=40, 84 | ) 85 | -------------------------------------------------------------------------------- /labs/setup/simplepg/rollout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import click 3 | import numpy as np 4 | import gym 5 | 6 | from simplepg.simple_utils import include_bias, weighted_sample 7 | 8 | 9 | def point_get_action(theta, ob, rng=np.random): 10 | ob_1 = include_bias(ob) 11 | mean = theta.dot(ob_1) 12 | return rng.normal(loc=mean, scale=1.) 13 | 14 | 15 | def cartpole_get_action(theta, ob, rng=np.random): 16 | ob_1 = include_bias(ob) 17 | logits = ob_1.dot(theta.T) 18 | return weighted_sample(logits, rng=rng) 19 | 20 | 21 | @click.command() 22 | @click.argument("env_id", type=str, default="Point-v0") 23 | def main(env_id): 24 | # Register the environment 25 | rng = np.random.RandomState(42) 26 | 27 | if env_id == 'CartPole-v0': 28 | env = gym.make('CartPole-v0') 29 | get_action = cartpole_get_action 30 | obs_dim = env.observation_space.shape[0] 31 | action_dim = env.action_space.n 32 | elif env_id == 'Point-v0': 33 | from simplepg import point_env 34 | env = gym.make('Point-v0') 35 | get_action = point_get_action 36 | obs_dim = env.observation_space.shape[0] 37 | action_dim = env.action_space.shape[0] 38 | else: 39 | raise ValueError( 40 | "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'") 41 | 42 | env.seed(42) 43 | 44 | # Initialize parameters 45 | theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1)) 46 | 47 | while True: 48 | ob = env.reset() 49 | done = False 50 | # Only render the first trajectory 51 | # Collect a new trajectory 52 | rewards = [] 53 | while not done: 54 | action = get_action(theta, ob, rng=rng) 55 | next_ob, rew, done, _ = env.step(action) 56 | ob = next_ob 57 | env.render() 58 | rewards.append(rew) 59 | 60 | print("Episode reward: %.2f" % np.sum(rewards)) 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /labs/setup/simplepg/simple_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.special 3 | import chainer 4 | 5 | 6 | # Compute gradient approximately using finite difference 7 | def numerical_grad(f, x, eps=1e-8): 8 | grad = np.zeros_like(x) 9 | for i in range(len(x)): 10 | xplus = np.array(x) 11 | xplus[i] += eps 12 | fplus = f(xplus) 13 | xminus = np.array(x) 14 | xminus[i] -= eps 15 | fminus = f(xminus) 16 | grad[i] = (fplus - fminus) / (2 * eps) 17 | return grad 18 | 19 | 20 | def gradient_check(f, g, x): 21 | # Test the implementation of g(x) = df/dx 22 | # Perform numerical differentiation and test it 23 | g_num = numerical_grad(f, x) 24 | g_test = g(x) 25 | try: 26 | np.testing.assert_allclose(g_num, g_test, rtol=1e-5) 27 | print("Gradient check passed!") 28 | except AssertionError as e: 29 | print(e) 30 | print("Error: Gradient check didn't pass!") 31 | exit() 32 | 33 | 34 | def log_softmax(logits): 35 | return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True) 36 | 37 | 38 | def softmax(logits): 39 | x = logits 40 | x = x - np.max(x, axis=-1, keepdims=True) 41 | x = np.exp(x) 42 | return x / np.sum(x, axis=-1, keepdims=True) 43 | 44 | 45 | def weighted_sample(logits, rng=np.random): 46 | weights = softmax(logits) 47 | return min( 48 | int(np.sum(rng.uniform() > np.cumsum(weights))), 49 | len(weights) - 1 50 | ) 51 | 52 | 53 | def include_bias(x): 54 | # Add a constant term (1.0) to each entry in x 55 | return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1) 56 | 57 | 58 | _tested = set() 59 | _tests = dict() 60 | 61 | nprs = np.random.RandomState 62 | 63 | 64 | def register_test(fn_name, kwargs, desired_output=None): 65 | assert fn_name not in _tests 66 | _tests[fn_name] = (kwargs, desired_output) 67 | 68 | 69 | def assert_allclose(a, b): 70 | if isinstance(a, (np.ndarray, float, int)): 71 | np.testing.assert_allclose(a, b) 72 | elif isinstance(a, (tuple, list)): 73 | assert isinstance(b, (tuple, list)) 74 | assert len(a) == len(b) 75 | for a_i, b_i in zip(a, b): 76 | assert_allclose(a_i, b_i) 77 | elif isinstance(a, chainer.Variable): 78 | assert isinstance(b, chainer.Variable) 79 | assert_allclose(a.data, b.data) 80 | else: 81 | raise NotImplementedError 82 | 83 | 84 | def test_once(fn): 85 | module = fn.__module__ 86 | name = fn.__name__ 87 | key = module + "." + name 88 | if key in _tested: 89 | return 90 | assert key in _tests, "Test for %s not found!" % key 91 | kwargs, desired_output = _tests[key] 92 | _tested.add(key) 93 | 94 | if callable(kwargs): 95 | kwargs = kwargs() 96 | 97 | if callable(desired_output): 98 | desired_output = desired_output() 99 | 100 | if desired_output is None: 101 | print("Desired output for %s:" % key, repr(fn(**kwargs))) 102 | exit() 103 | else: 104 | try: 105 | output = fn(**kwargs) 106 | assert_allclose(desired_output, output) 107 | print("Test for %s passed!" % key) 108 | except AssertionError as e: 109 | print(e) 110 | print("Error: test for %s didn't pass!" % key) 111 | exit() 112 | -------------------------------------------------------------------------------- /slides/FrontiersPieterAbbeelPeterChenRockyDuan.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/FrontiersPieterAbbeelPeterChenRockyDuan.pdf -------------------------------------------------------------------------------- /slides/Lec10aUtilities.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec10aUtilities.pdf -------------------------------------------------------------------------------- /slides/Lec10binverseRL.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec10binverseRL.pdf -------------------------------------------------------------------------------- /slides/Lec1intromdpsexactmethods.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec1intromdpsexactmethods.pdf -------------------------------------------------------------------------------- /slides/Lec2samplingbasedapproximationsandfunctionfitting.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec2samplingbasedapproximationsandfunctionfitting.pdf -------------------------------------------------------------------------------- /slides/Lec3DQN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec3DQN.pdf -------------------------------------------------------------------------------- /slides/Lec4apolicygradientsactorcritic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec4apolicygradientsactorcritic.pdf -------------------------------------------------------------------------------- /slides/Lec4b_Pong_from_Pixels.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec4b_Pong_from_Pixels.pdf -------------------------------------------------------------------------------- /slides/Lec5advancedpolicygradientmethods.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec5advancedpolicygradientmethods.pdf -------------------------------------------------------------------------------- /slides/Lec6nutsandboltsdeeprlresearch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec6nutsandboltsdeeprlresearch.pdf -------------------------------------------------------------------------------- /slides/Lec7deeprlbootcampsvgscg.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec7deeprlbootcampsvgscg.pdf -------------------------------------------------------------------------------- /slides/Lec8derivativefree.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec8derivativefree.pdf -------------------------------------------------------------------------------- /slides/Lec9modelbaseddeeprl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec9modelbaseddeeprl.pdf -------------------------------------------------------------------------------- /slides/TAintros.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/TAintros.pdf --------------------------------------------------------------------------------