├── README.md
├── labs
    ├── lab1and2.md
    ├── lab1and2
    │   ├── .ipynb_checkpoints
    │   │   ├── Lab 1 - Problem 1-checkpoint.ipynb
    │   │   └── Lab 1 - Problem 2-checkpoint.ipynb
    │   ├── Lab 1 - Problem 1.ipynb
    │   ├── Lab 1 - Problem 2.ipynb
    │   ├── Lab 1 - Problem 3.ipynb
    │   ├── Lab 2.ipynb
    │   ├── crawler_env.py
    │   ├── discrete_env.py
    │   ├── environment.yml
    │   ├── frozen_lake.py
    │   └── misc.py
    ├── lab3.pdf
    ├── lab3
    │   ├── docker_run.sh
    │   ├── docker_run_vnc.sh
    │   ├── environment.yml
    │   ├── findport.sh
    │   ├── launch_bg_screen_buffer.sh
    │   ├── logger.py
    │   ├── scripts
    │   │   ├── setup_xquartz.sh
    │   │   └── test_environment_setup.py
    │   ├── simpledqn
    │   │   ├── __init__.py
    │   │   ├── gridworld_env.py
    │   │   ├── main.py
    │   │   ├── replay_buffer.py
    │   │   ├── replay_buffer_warm_start.pkl
    │   │   ├── simple_utils.py
    │   │   ├── weights_warm_start.pkl
    │   │   └── wrappers.py
    │   └── viskit
    │   │   ├── __init__.py
    │   │   ├── core.py
    │   │   ├── frontend.py
    │   │   ├── static
    │   │       ├── css
    │   │       │   ├── bootstrap.min.css
    │   │       │   └── dropdowns-enhancement.css
    │   │       └── js
    │   │       │   ├── bootstrap.min.js
    │   │       │   ├── dropdowns-enhancement.js
    │   │       │   ├── jquery-1.10.2.min.js
    │   │       │   ├── jquery.loadTemplate-1.5.6.js
    │   │       │   └── plotly-latest.min.js
    │   │   └── templates
    │   │       └── main.html
    ├── lab4.pdf
    ├── lab4
    │   ├── a2c.py
    │   ├── alg_utils.py
    │   ├── algs.py
    │   ├── docker_run.sh
    │   ├── docker_run_vnc.sh
    │   ├── env_makers.py
    │   ├── environment.yml
    │   ├── experiments
    │   │   ├── run_a2c_breakout.py
    │   │   ├── run_a2c_pong.py
    │   │   ├── run_a2c_pong_warm_start.py
    │   │   ├── run_pg_cartpole.py
    │   │   ├── run_trpo_cartpole.py
    │   │   ├── run_trpo_half_cheetah.py
    │   │   └── run_trpo_pendulum.py
    │   ├── findport.py
    │   ├── launch_bg_screen_buffer.sh
    │   ├── logger.py
    │   ├── models.py
    │   ├── pg.py
    │   ├── pong_warm_start.pkl
    │   ├── scripts
    │   │   ├── resume_training.py
    │   │   ├── setup_xquartz.sh
    │   │   ├── sim_policy.py
    │   │   └── test_environment_setup.py
    │   ├── simplepg
    │   │   ├── __pycache__
    │   │   │   ├── point_env.cpython-35.pyc
    │   │   │   └── simple_utils.cpython-35.pyc
    │   │   ├── main.py
    │   │   ├── point_env.py
    │   │   ├── rollout.py
    │   │   └── simple_utils.py
    │   ├── tests
    │   │   ├── __pycache__
    │   │   │   ├── a2c_tests.cpython-35.pyc
    │   │   │   ├── pg_tests.cpython-35.pyc
    │   │   │   ├── simplepg_tests.cpython-35.pyc
    │   │   │   └── trpo_tests.cpython-35.pyc
    │   │   ├── a2c_tests.py
    │   │   ├── pg_tests.py
    │   │   ├── simplepg_tests.py
    │   │   └── trpo_tests.py
    │   ├── trpo.py
    │   ├── utils.py
    │   └── viskit
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-35.pyc
    │   │       └── core.cpython-35.pyc
    │   │   ├── core.py
    │   │   ├── frontend.py
    │   │   ├── static
    │   │       ├── css
    │   │       │   ├── bootstrap.min.css
    │   │       │   └── dropdowns-enhancement.css
    │   │       └── js
    │   │       │   ├── bootstrap.min.js
    │   │       │   ├── dropdowns-enhancement.js
    │   │       │   ├── jquery-1.10.2.min.js
    │   │       │   ├── jquery.loadTemplate-1.5.6.js
    │   │       │   └── plotly-latest.min.js
    │   │   └── templates
    │   │       └── main.html
    ├── lab5.pdf
    ├── lab5
    │   ├── a2c.py
    │   ├── alg_utils.py
    │   ├── algs.py
    │   ├── cloudexec.py
    │   ├── cloudexec.yml.template
    │   ├── docker_run.sh
    │   ├── docker_run_vnc.sh
    │   ├── env_makers.py
    │   ├── environment.yml
    │   ├── experiments
    │   │   ├── run_a2c_breakout.py
    │   │   ├── run_a2c_pong.py
    │   │   ├── run_cloud_trpo_cartpole.py
    │   │   ├── run_cloud_trpo_pendulum_baseline.py
    │   │   ├── run_pg_cartpole.py
    │   │   ├── run_trpo_cartpole.py
    │   │   ├── run_trpo_half_cheetah.py
    │   │   └── run_trpo_pendulum.py
    │   ├── findport.py
    │   ├── launch_bg_screen_buffer.sh
    │   ├── logger.py
    │   ├── models.py
    │   ├── pg.py
    │   ├── scripts
    │   │   ├── ec2ctl.py
    │   │   ├── generate_key_pairs.py
    │   │   ├── resume_training.py
    │   │   ├── setup_xquartz.sh
    │   │   ├── sim_policy.py
    │   │   ├── sync_s3.py
    │   │   ├── test_ec2_setup.py
    │   │   └── test_environment_setup.py
    │   ├── simplepg
    │   │   ├── main.py
    │   │   ├── point_env.py
    │   │   ├── rollout.py
    │   │   └── simple_utils.py
    │   ├── tests
    │   │   ├── a2c_tests.py
    │   │   ├── pg_tests.py
    │   │   ├── simplepg_tests.py
    │   │   └── trpo_tests.py
    │   ├── trpo.py
    │   ├── utils.py
    │   └── viskit
    │   │   ├── __init__.py
    │   │   ├── core.py
    │   │   ├── frontend.py
    │   │   ├── static
    │   │       ├── css
    │   │       │   ├── bootstrap.min.css
    │   │       │   └── dropdowns-enhancement.css
    │   │       └── js
    │   │       │   ├── bootstrap.min.js
    │   │       │   ├── dropdowns-enhancement.js
    │   │       │   ├── jquery-1.10.2.min.js
    │   │       │   ├── jquery.loadTemplate-1.5.6.js
    │   │       │   └── plotly-latest.min.js
    │   │   └── templates
    │   │       └── main.html
    ├── setup.pdf
    └── setup
    │   ├── docker_run.sh
    │   ├── docker_run_vnc.sh
    │   ├── environment.yml
    │   ├── findport.py
    │   ├── launch_bg_screen_buffer.sh
    │   ├── scripts
    │       ├── setup_xquartz.sh
    │       └── test_environment_setup.py
    │   └── simplepg
    │       ├── __pycache__
    │           ├── point_env.cpython-35.pyc
    │           └── simple_utils.cpython-35.pyc
    │       ├── point_env.py
    │       ├── rollout.py
    │       └── simple_utils.py
└── slides
    ├── FrontiersPieterAbbeelPeterChenRockyDuan.pdf
    ├── Lec10aUtilities.pdf
    ├── Lec10binverseRL.pdf
    ├── Lec1intromdpsexactmethods.pdf
    ├── Lec2samplingbasedapproximationsandfunctionfitting.pdf
    ├── Lec3DQN.pdf
    ├── Lec4apolicygradientsactorcritic.pdf
    ├── Lec4b_Pong_from_Pixels.pdf
    ├── Lec5advancedpolicygradientmethods.pdf
    ├── Lec6nutsandboltsdeeprlresearch.pdf
    ├── Lec7deeprlbootcampsvgscg.pdf
    ├── Lec8derivativefree.pdf
    ├── Lec9modelbaseddeeprl.pdf
    └── TAintros.pdf


/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Reinforcement Learning Bootcamp @ UCBerkeley 2017
 2 | 
 3 | That was a wonderful experience! Awesome people and lots of learning.
 4 | 
 5 | **Note:** Slides and videos are now officially available at the [bootcamp webpage](https://sites.google.com/view/deep-rl-bootcamp/lectures?authuser=0)
 6 | 
 7 | ## Content
 8 | 
 9 | * **labs:** Completed lab lessons + environment setup guide
10 | * **slides:** Slides used by professors for the lectures
11 | 


--------------------------------------------------------------------------------
/labs/lab1and2.md:
--------------------------------------------------------------------------------
 1 | * Activate the conda environment by running
 2 | 	```
 3 |     source activate deeprlbootcamp
 4 |     ```
 5 | * Launch IPython Notebook from this directory; this should open up a browser window where you can click to open Lab1 and Lab2
 6 | 	```
 7 |     jupyter notebook
 8 |     ```
 9 | * After opening a lab file, click “File - Trust Notebook”
10 | * If you have never used IPython Notebook before, skim this quick tutorial here: http://cs231n.github.io/ipython-tutorial/ 


--------------------------------------------------------------------------------
/labs/lab1and2/discrete_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from gym import Env, spaces
 4 | from gym.utils import seeding
 5 | 
 6 | def categorical_sample(prob_n, np_random):
 7 |     """
 8 |     Sample from categorical distribution
 9 |     Each row specifies class probabilities
10 |     """
11 |     prob_n = np.asarray(prob_n)
12 |     csprob_n = np.cumsum(prob_n)
13 |     return (csprob_n > np_random.rand()).argmax()
14 | 
15 | 
16 | class DiscreteEnv(Env):
17 | 
18 |     """
19 |     Has the following members
20 |     - nS: number of states
21 |     - nA: number of actions
22 |     - P: transitions (*)
23 |     - isd: initial state distribution (**)
24 | 
25 |     (*) dictionary dict of dicts of lists, where
26 |       P[s][a] == [(probability, nextstate, reward, done), ...]
27 |     (**) list or array of length nS
28 | 
29 | 
30 |     """
31 |     def __init__(self, nS, nA, P, isd):
32 |         self.P = P
33 |         self.isd = isd
34 |         self.lastaction=None # for rendering
35 |         self.nS = nS
36 |         self.nA = nA
37 | 
38 |         self.action_space = spaces.Discrete(self.nA)
39 |         self.observation_space = spaces.Discrete(self.nS)
40 | 
41 |         self._seed()
42 |         self._reset()
43 | 
44 |     def _seed(self, seed=None):
45 |         self.np_random, seed = seeding.np_random(seed)
46 |         return [seed]
47 | 
48 |     def _reset(self):
49 |         self.s = categorical_sample(self.isd, self.np_random)
50 |         self.lastaction=None
51 |         return self.s
52 | 
53 |     def _step(self, a):
54 |         transitions = self.P[self.s][a]
55 |         i = categorical_sample([t[0] for t in transitions], self.np_random)
56 |         p, s, r, d= transitions[i]
57 |         self.s = s
58 |         self.lastaction=a
59 |         return (s, r, d, {"prob" : p})
60 | 


--------------------------------------------------------------------------------
/labs/lab1and2/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | channels:
 3 |     - menpo
 4 |     - soumith
 5 | dependencies:
 6 |     - python==3.5.3
 7 |     - opencv3=3.1.0
 8 |     - numpy==1.13.1
 9 |     - scipy==0.19.1
10 |     - notebook
11 |     - pip:
12 |         - gym==0.9.2
13 |         - chainer==2.0.1
14 |         - ipdb==0.10.3
15 |         - tblib==1.3.2
16 |         - Pillow==4.2.1
17 |         - PyOpenGL==3.1.0
18 |         - cloudpickle==0.3.1
19 |         - click==6.7
20 |         - python-dateutil==2.6.1
21 |         - pyyaml==3.12
22 |         - easydict==1.7
23 |         - boto3==1.4.4
24 |         - mako==1.0.7
25 |         - redis==2.10.5
26 |         - Flask==0.12.2
27 |         - plotly==2.0.12
28 |         - tqdm==4.14.0
29 |         - cupy==1.0.1; 'linux' in sys_platform
30 |         - cached-property==1.3.0
31 |         - h5py==2.7.0
32 |         - matplotlib
33 | 


--------------------------------------------------------------------------------
/labs/lab1and2/frozen_lake.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | from six import StringIO, b
  4 | 
  5 | from gym import utils
  6 | import discrete_env
  7 | 
  8 | LEFT = 0
  9 | DOWN = 1
 10 | RIGHT = 2
 11 | UP = 3
 12 | 
 13 | MAPS = {
 14 |     "4x4": [
 15 |         "SFFF",
 16 |         "FHFH",
 17 |         "FFFH",
 18 |         "HFFG"
 19 |     ],
 20 |     "8x8": [
 21 |         "SFFFFFFF",
 22 |         "FFFFFFFF",
 23 |         "FFFHFFFF",
 24 |         "FFFFFHFF",
 25 |         "FFFHFFFF",
 26 |         "FHHFFFHF",
 27 |         "FHFFHFHF",
 28 |         "FFFHFFFG"
 29 |     ],
 30 | }
 31 | 
 32 | class FrozenLakeEnv(discrete_env.DiscreteEnv):
 33 |     """
 34 |     Winter is here. You and your friends were tossing around a frisbee at the park
 35 |     when you made a wild throw that left the frisbee out in the middle of the lake.
 36 |     The water is mostly frozen, but there are a few holes where the ice has melted.
 37 |     If you step into one of those holes, you'll fall into the freezing water.
 38 |     At this time, there's an international frisbee shortage, so it's absolutely imperative that
 39 |     you navigate across the lake and retrieve the disc.
 40 |     However, the ice is slippery, so you won't always move in the direction you intend.
 41 |     The surface is described using a grid like the following
 42 | 
 43 |         SFFF
 44 |         FHFH
 45 |         FFFH
 46 |         HFFG
 47 | 
 48 |     S : starting point, safe
 49 |     F : frozen surface, safe
 50 |     H : hole, fall to your doom
 51 |     G : goal, where the frisbee is located
 52 | 
 53 |     The episode ends when you reach the goal or fall in a hole.
 54 |     You receive a reward of 1 if you reach the goal, and zero otherwise.
 55 | 
 56 |     """
 57 | 
 58 |     metadata = {'render.modes': ['human', 'ansi']}
 59 | 
 60 |     def __init__(self, desc=None, map_name="4x4",is_slippery=True):
 61 |         if desc is None and map_name is None:
 62 |             raise ValueError('Must provide either desc or map_name')
 63 |         elif desc is None:
 64 |             desc = MAPS[map_name]
 65 |         self.desc = desc = np.asarray(desc,dtype='c')
 66 |         self.nrow, self.ncol = nrow, ncol = desc.shape
 67 | 
 68 |         nA = 4
 69 |         nS = nrow * ncol
 70 | 
 71 |         isd = np.array(desc == b'S').astype('float64').ravel()
 72 |         isd /= isd.sum()
 73 | 
 74 |         P = {s : {a : [] for a in range(nA)} for s in range(nS)}
 75 | 
 76 |         def to_s(row, col):
 77 |             return row*ncol + col
 78 |         def inc(row, col, a):
 79 |             if a==0: # left
 80 |                 col = max(col-1,0)
 81 |             elif a==1: # down
 82 |                 row = min(row+1,nrow-1)
 83 |             elif a==2: # right
 84 |                 col = min(col+1,ncol-1)
 85 |             elif a==3: # up
 86 |                 row = max(row-1,0)
 87 |             return (row, col)
 88 | 
 89 |         for row in range(nrow):
 90 |             for col in range(ncol):
 91 |                 s = to_s(row, col)
 92 |                 for a in range(4):
 93 |                     li = P[s][a]
 94 |                     letter = desc[row, col]
 95 |                     if letter in b'GH':
 96 |                         li.append((1.0, s, 0, True))
 97 |                     else:
 98 |                         if is_slippery:
 99 |                             for b in [(a-1)%4, a, (a+1)%4]:
100 |                                 newrow, newcol = inc(row, col, b)
101 |                                 newstate = to_s(newrow, newcol)
102 |                                 newletter = desc[newrow, newcol]
103 |                                 done = bytes(newletter) in b'GH'
104 |                                 rew = float(newletter == b'G')
105 |                                 li.append((0.8 if b==a else 0.1, newstate, rew, done))
106 |                         else:
107 |                             newrow, newcol = inc(row, col, a)
108 |                             newstate = to_s(newrow, newcol)
109 |                             newletter = desc[newrow, newcol]
110 |                             done = bytes(newletter) in b'GH'
111 |                             rew = float(newletter == b'G')
112 |                             li.append((1.0, newstate, rew, done))
113 | 
114 |         super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
115 | 
116 |     def _render(self, mode='human', close=False):
117 |         if close:
118 |             return
119 |         outfile = StringIO() if mode == 'ansi' else sys.stdout
120 | 
121 |         row, col = self.s // self.ncol, self.s % self.ncol
122 |         desc = self.desc.tolist()
123 |         desc = [[c.decode('utf-8') for c in line] for line in desc]
124 |         desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
125 |         if self.lastaction is not None:
126 |             outfile.write("  ({})\n".format(["Left","Down","Right","Up"][self.lastaction]))
127 |         else:
128 |             outfile.write("\n")
129 |         outfile.write("\n".join(''.join(line) for line in desc)+"\n")
130 | 
131 |         return outfile
132 | 


--------------------------------------------------------------------------------
/labs/lab1and2/misc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | from six import StringIO, b
  4 | 
  5 | from gym import utils
  6 | import discrete_env
  7 | 
  8 | LEFT = 0
  9 | DOWN = 1
 10 | RIGHT = 2
 11 | UP = 3
 12 | 
 13 | MAPS = {
 14 |     "4x4": [
 15 |         "SFFF",
 16 |         "FHFH",
 17 |         "FFFH",
 18 |         "HFFG"
 19 |     ],
 20 |     "8x8": [
 21 |         "SFFFFFFF",
 22 |         "FFFFFFFF",
 23 |         "FFFHFFFF",
 24 |         "FFFFFHFF",
 25 |         "FFFHFFFF",
 26 |         "FHHFFFHF",
 27 |         "FHFFHFHF",
 28 |         "FFFHFFFG"
 29 |     ],
 30 | }
 31 | 
 32 | class FrozenLakeEnv(discrete_env.DiscreteEnv):
 33 |     """
 34 |     Winter is here. You and your friends were tossing around a frisbee at the park
 35 |     when you made a wild throw that left the frisbee out in the middle of the lake.
 36 |     The water is mostly frozen, but there are a few holes where the ice has melted.
 37 |     If you step into one of those holes, you'll fall into the freezing water.
 38 |     At this time, there's an international frisbee shortage, so it's absolutely imperative that
 39 |     you navigate across the lake and retrieve the disc.
 40 |     However, the ice is slippery, so you won't always move in the direction you intend.
 41 |     The surface is described using a grid like the following
 42 | 
 43 |         SFFF
 44 |         FHFH
 45 |         FFFH
 46 |         HFFG
 47 | 
 48 |     S : starting point, safe
 49 |     F : frozen surface, safe
 50 |     H : hole, fall to your doom
 51 |     G : goal, where the frisbee is located
 52 | 
 53 |     The episode ends when you reach the goal or fall in a hole.
 54 |     You receive a reward of 1 if you reach the goal, and zero otherwise.
 55 | 
 56 |     """
 57 | 
 58 |     metadata = {'render.modes': ['human', 'ansi']}
 59 | 
 60 |     def __init__(self, desc=None, map_name="4x4",is_slippery=True):
 61 |         if desc is None and map_name is None:
 62 |             raise ValueError('Must provide either desc or map_name')
 63 |         elif desc is None:
 64 |             desc = MAPS[map_name]
 65 |         self.desc = desc = np.asarray(desc,dtype='c')
 66 |         self.nrow, self.ncol = nrow, ncol = desc.shape
 67 | 
 68 |         nA = 4
 69 |         nS = nrow * ncol
 70 | 
 71 |         isd = np.array(desc == b'S').astype('float64').ravel()
 72 |         isd /= isd.sum()
 73 | 
 74 |         P = {s : {a : [] for a in range(nA)} for s in range(nS)}
 75 | 
 76 |         def to_s(row, col):
 77 |             return row*ncol + col
 78 |         def inc(row, col, a):
 79 |             if a==0: # left
 80 |                 col = max(col-1,0)
 81 |             elif a==1: # down
 82 |                 row = min(row+1,nrow-1)
 83 |             elif a==2: # right
 84 |                 col = min(col+1,ncol-1)
 85 |             elif a==3: # up
 86 |                 row = max(row-1,0)
 87 |             return (row, col)
 88 | 
 89 |         for row in range(nrow):
 90 |             for col in range(ncol):
 91 |                 s = to_s(row, col)
 92 |                 for a in range(4):
 93 |                     li = P[s][a]
 94 |                     letter = desc[row, col]
 95 |                     if letter in b'GH':
 96 |                         li.append((1.0, s, 0, True))
 97 |                     else:
 98 |                         if is_slippery:
 99 |                             for b in [(a-1)%4, a, (a+1)%4]:
100 |                                 newrow, newcol = inc(row, col, b)
101 |                                 newstate = to_s(newrow, newcol)
102 |                                 newletter = desc[newrow, newcol]
103 |                                 done = bytes(newletter) in b'GH'
104 |                                 rew = float(newletter == b'G')
105 |                                 li.append((0.8 if b==a else 0.1, newstate, rew, done))
106 |                         else:
107 |                             newrow, newcol = inc(row, col, a)
108 |                             newstate = to_s(newrow, newcol)
109 |                             newletter = desc[newrow, newcol]
110 |                             done = bytes(newletter) in b'GH'
111 |                             rew = float(newletter == b'G')
112 |                             li.append((1.0, newstate, rew, done))
113 | 
114 |         super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
115 | 
116 |     def _render(self, mode='human', close=False):
117 |         if close:
118 |             return
119 |         outfile = StringIO() if mode == 'ansi' else sys.stdout
120 | 
121 |         row, col = self.s // self.ncol, self.s % self.ncol
122 |         desc = self.desc.tolist()
123 |         desc = [[c.decode('utf-8') for c in line] for line in desc]
124 |         desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
125 |         if self.lastaction is not None:
126 |             outfile.write("  ({})\n".format(["Left","Down","Right","Up"][self.lastaction]))
127 |         else:
128 |             outfile.write("\n")
129 |         outfile.write("\n".join(''.join(line) for line in desc)+"\n")
130 | 
131 |         return outfile
132 | 
133 | def make_grader(expected):
134 |     boxed_i = [0]
135 |     boxed_err = [False]
136 |     expected_lines = expected.split("\n")
137 |     def checking_print(line):
138 |         if boxed_i[0] < len(expected_lines):
139 |             expected_line = expected_lines[boxed_i[0]]
140 |         else:
141 |             expected_line = "[END]"
142 |         if expected_line == line:
143 |             print(line)
144 |         else:
145 |             boxed_err[0] = True
146 |             print("\x1b[41m", end="")
147 |             print(line, end="")
148 |             print("\x1b[0m", end="")
149 |             print(" *** Expected: \x1b[42m" + expected_line + "\x1b[0m")
150 |         boxed_i[0] += 1
151 |         if boxed_i[0] == len(expected_lines):
152 |             print("Test failed" if boxed_err[0] else "Test succeeded")
153 |     return checking_print
154 | 


--------------------------------------------------------------------------------
/labs/lab3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3.pdf


--------------------------------------------------------------------------------
/labs/lab3/docker_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | viskit_port=$("$DIR/findport.sh" 5000 1)
 4 | xhost=xhost
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | if [[ $(uname) == 'Darwin' ]]; then
12 |     # if xhost not defined, check 
13 |     if ! hash $xhost 2>/dev/null; then
14 |         xhost=/opt/X11/bin/xhost
15 |         if [ ! -f $xhost ]; then
16 |             echo "xhost not found!"
17 |             exit
18 |         fi
19 |     fi
20 |     ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}')
21 |     $xhost + $ip >/dev/null
22 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
23 |         -e DISPLAY=$ip:0 \
24 |         -v "$DIR":/root/code/bootcamp_pg \
25 |         -ti dementrock/deeprlbootcamp \
26 |           ${1-/bin/bash} "${@:2}"
27 |     $xhost - $ip >/dev/null
28 | elif [[ $(uname) == 'Linux' ]]; then
29 |     $xhost +local:root >/dev/null
30 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
31 |         -e DISPLAY=$DISPLAY \
32 |         -v /tmp/.X11-unix:/tmp/.X11-unix \
33 |         -v "$DIR":/root/code/bootcamp_pg \
34 |         -ti dementrock/deeprlbootcamp \
35 |           ${1-/bin/bash} "${@:2}"
36 |     $xhost -local:root >/dev/null
37 | else
38 |     echo "This script only supports macOS or Linux"
39 | fi
40 | 


--------------------------------------------------------------------------------
/labs/lab3/docker_run_vnc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | vnc_port=$("$DIR/findport.sh" 3000 1)
 4 | viskit_port=$("$DIR/findport.sh" 5000 1)
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284"
12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
13 |     -v "$DIR":/root/code/bootcamp_pg \
14 |     -ti dementrock/deeprlbootcamp \
15 |       ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}"
16 | 


--------------------------------------------------------------------------------
/labs/lab3/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | channels:
 3 |     - menpo
 4 |     - soumith
 5 | dependencies:
 6 |     - python==3.5.3
 7 |     - opencv3=3.1.0
 8 |     - numpy==1.13.1
 9 |     - scipy==0.19.1
10 |     - pip:
11 |         - gym==0.9.2
12 |         - chainer==2.0.1
13 |         - ipdb==0.10.3
14 |         - tblib==1.3.2
15 |         - atari_py==0.1.1
16 |         - Pillow==4.2.1
17 |         - PyOpenGL==3.1.0
18 |         - cloudpickle==0.3.1
19 |         - click==6.7
20 |         - python-dateutil==2.6.1
21 |         - pyyaml==3.12
22 |         - easydict==1.7
23 |         - boto3==1.4.4
24 |         - mako==1.0.7
25 |         - redis==2.10.5
26 |         - Flask==0.12.2
27 |         - plotly==2.0.12
28 |         - tqdm==4.14.0
29 |         - cupy==1.0.1; 'linux' in sys_platform
30 |         - cached-property==1.3.0
31 |         - h5py==2.7.0
32 | 


--------------------------------------------------------------------------------
/labs/lab3/findport.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Please run as root.
 4 | # Usage: bash findport.sh 3000 100
 5 | #
 6 | 
 7 | 
 8 | if [[ -z "$1" || -z "$2" ]]; then
 9 |   echo "Usage: $0 <base_port> <increment>"
10 |   exit 1
11 | fi
12 | 
13 | 
14 | BASE=$1
15 | INCREMENT=$2
16 | 
17 | port=$BASE
18 | isfree=$(netstat -aln | grep $port)
19 | 
20 | while [[ -n "$isfree" ]]; do
21 |   port=$[port+INCREMENT]
22 |   isfree=$(netstat -aln | grep $port)
23 | done
24 | 
25 | echo "$port"
26 | exit 0
27 | 


--------------------------------------------------------------------------------
/labs/lab3/launch_bg_screen_buffer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | killall() {
 4 | kill -INT "$xvfb_pid" 
 5 | kill -INT "$x11vnc_pid" 
 6 | exit
 7 | }
 8 | 
 9 | trap killall SIGINT
10 | trap killall SIGTERM
11 | trap killall SIGKILL
12 | 
13 | Xvfb :99 -screen 0 1024x768x24 -ac  +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$!
14 | 
15 | mkdir ~/.x11vnc
16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd
17 | 
18 | command="${1-/bin/bash} ${@:2}"
19 | 
20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!"
21 | 
22 | DISPLAY=:99 $command
23 | 
24 | killall
25 | 


--------------------------------------------------------------------------------
/labs/lab3/logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | See README.md for a description of the logging API.
  4 | 
  5 | OFF state corresponds to having Logger.CURRENT == Logger.DEFAULT
  6 | ON state is otherwise
  7 | 
  8 | """
  9 | import datetime
 10 | from collections import OrderedDict
 11 | import os
 12 | import sys
 13 | import shutil
 14 | import os.path as osp
 15 | import json
 16 | 
 17 | import dateutil.tz
 18 | 
 19 | LOG_OUTPUT_FORMATS = ['stdout', 'log', 'json']
 20 | 
 21 | DEBUG = 10
 22 | INFO = 20
 23 | WARN = 30
 24 | ERROR = 40
 25 | 
 26 | DISABLED = 50
 27 | 
 28 | 
 29 | class OutputFormat(object):
 30 |     def writekvs(self, kvs):
 31 |         """
 32 |         Write key-value pairs
 33 |         """
 34 |         raise NotImplementedError
 35 | 
 36 |     def writeseq(self, args):
 37 |         """
 38 |         Write a sequence of other data (e.g. a logging message)
 39 |         """
 40 |         pass
 41 | 
 42 |     def close(self):
 43 |         return
 44 | 
 45 | 
 46 | class HumanOutputFormat(OutputFormat):
 47 |     def __init__(self, file):
 48 |         self.file = file
 49 | 
 50 |     def writekvs(self, kvs):
 51 |         # Create strings for printing
 52 |         key2str = OrderedDict()
 53 |         for (key, val) in kvs.items():
 54 |             valstr = '%-8.5g' % (val,) if hasattr(val, '__float__') else val
 55 |             key2str[self._truncate(key)] = self._truncate(valstr)
 56 | 
 57 |         # Find max widths
 58 |         keywidth = max(map(len, key2str.keys()))
 59 |         valwidth = max(map(len, key2str.values()))
 60 | 
 61 |         # Write out the data
 62 |         dashes = '-' * (keywidth + valwidth + 7)
 63 |         lines = [dashes]
 64 |         for (key, val) in key2str.items():
 65 |             lines.append('| %s%s | %s%s |' % (
 66 |                 key,
 67 |                 ' ' * (keywidth - len(key)),
 68 |                 val,
 69 |                 ' ' * (valwidth - len(val)),
 70 |             ))
 71 |         lines.append(dashes)
 72 |         self.file.write('\n'.join(lines) + '\n')
 73 | 
 74 |         # Flush the output to the file
 75 |         self.file.flush()
 76 | 
 77 |     def _truncate(self, s):
 78 |         return s[:20] + '...' if len(s) > 23 else s
 79 | 
 80 |     def writeseq(self, args):
 81 |         for arg in args:
 82 |             self.file.write(arg)
 83 |         self.file.write('\n')
 84 |         self.file.flush()
 85 | 
 86 | 
 87 | class JSONOutputFormat(OutputFormat):
 88 |     def __init__(self, file):
 89 |         self.file = file
 90 | 
 91 |     def writekvs(self, kvs):
 92 |         for k, v in kvs.items():
 93 |             if hasattr(v, 'dtype'):
 94 |                 v = v.tolist()
 95 |                 kvs[k] = float(v)
 96 |         self.file.write(json.dumps(kvs) + '\n')
 97 |         self.file.flush()
 98 | 
 99 | 
100 | def make_output_format(format, ev_dir):
101 |     os.makedirs(ev_dir, exist_ok=True)
102 |     if format == 'stdout':
103 |         return HumanOutputFormat(sys.stdout)
104 |     elif format == 'log':
105 |         log_file = open(osp.join(ev_dir, 'log.txt'), 'at')
106 |         return HumanOutputFormat(log_file)
107 |     elif format == 'json':
108 |         json_file = open(osp.join(ev_dir, 'progress.json'), 'at')
109 |         return JSONOutputFormat(json_file)
110 |     else:
111 |         raise ValueError('Unknown format specified: %s' % (format,))
112 | 
113 | 
114 | # ================================================================
115 | # API
116 | # ================================================================
117 | 
118 | 
119 | def logkv(key, val):
120 |     """
121 |     Log a value of some diagnostic
122 |     Call this once for each diagnostic quantity, each iteration
123 |     """
124 |     Logger.CURRENT.logkv(key, val)
125 | 
126 | 
127 | def dumpkvs():
128 |     """
129 |     Write all of the diagnostics from the current iteration
130 | 
131 |     level: int. (see old_logger.py docs) If the global logger level is higher than
132 |                 the level argument here, don't print to stdout.
133 |     """
134 |     Logger.CURRENT.dumpkvs()
135 | 
136 | 
137 | # for backwards compatibility
138 | record_tabular = logkv
139 | dump_tabular = dumpkvs
140 | 
141 | 
142 | def log(*args, level=INFO):
143 |     """
144 |     Write the sequence of args, with no separators, to the console and output files (if you've configured an output file).
145 |     """
146 |     Logger.CURRENT.log(*args, level=level)
147 | 
148 | 
149 | def debug(*args):
150 |     log(*args, level=DEBUG)
151 | 
152 | 
153 | def info(*args):
154 |     log(*args, level=INFO)
155 | 
156 | 
157 | def warn(*args):
158 |     log(*args, level=WARN)
159 | 
160 | 
161 | def error(*args):
162 |     log(*args, level=ERROR)
163 | 
164 | 
165 | def set_level(level):
166 |     """
167 |     Set logging threshold on current logger.
168 |     """
169 |     Logger.CURRENT.set_level(level)
170 | 
171 | 
172 | def get_level():
173 |     """
174 |     Set logging threshold on current logger.
175 |     """
176 |     return Logger.CURRENT.level
177 | 
178 | 
179 | def get_dir():
180 |     """
181 |     Get directory that log files are being written to.
182 |     will be None if there is no output directory (i.e., if you didn't call start)
183 |     """
184 |     return Logger.CURRENT.get_dir()
185 | 
186 | 
187 | def get_expt_dir():
188 |     sys.stderr.write(
189 |         "get_expt_dir() is Deprecated. Switch to get_dir() [%s]\n" % (get_dir(),))
190 |     return get_dir()
191 | 
192 | 
193 | # ================================================================
194 | # Backend
195 | # ================================================================
196 | 
197 | 
198 | class Logger(object):
199 |     # A logger with no output files. (See right below class definition)
200 |     DEFAULT = None
201 |     # So that you can still log to the terminal without setting up any output files
202 |     CURRENT = None  # Current logger being used by the free functions above
203 | 
204 |     def __init__(self, dir, output_formats):
205 |         self.name2val = OrderedDict()  # values this iteration
206 |         self.level = INFO
207 |         self.dir = dir
208 |         self.output_formats = output_formats
209 | 
210 |     # Logging API, forwarded
211 |     # ----------------------------------------
212 |     def logkv(self, key, val):
213 |         self.name2val[key] = val
214 | 
215 |     def dumpkvs(self):
216 |         for fmt in self.output_formats:
217 |             fmt.writekvs(self.name2val)
218 |         self.name2val.clear()
219 | 
220 |     def log(self, *args, level=INFO):
221 |         now = datetime.datetime.now(dateutil.tz.tzlocal())
222 |         timestamp = now.strftime('[%Y-%m-%d %H:%M:%S.%f %Z] ')
223 |         if self.level <= level:
224 |             self._do_log((timestamp,) + args)
225 | 
226 |     # Configuration
227 |     # ----------------------------------------
228 |     def set_level(self, level):
229 |         self.level = level
230 | 
231 |     def get_dir(self):
232 |         return self.dir
233 | 
234 |     def close(self):
235 |         for fmt in self.output_formats:
236 |             fmt.close()
237 | 
238 |     # Misc
239 |     # ----------------------------------------
240 |     def _do_log(self, args):
241 |         for fmt in self.output_formats:
242 |             fmt.writeseq(args)
243 | 
244 | 
245 | # ================================================================
246 | 
247 | Logger.DEFAULT = Logger(
248 |     output_formats=[HumanOutputFormat(sys.stdout)], dir=None)
249 | Logger.CURRENT = Logger.DEFAULT
250 | 
251 | 
252 | class session(object):
253 |     """
254 |     Context manager that sets up the loggers for an experiment.
255 |     """
256 | 
257 |     CURRENT = None  # Set to a LoggerContext object using enter/exit or context manager
258 | 
259 |     def __init__(self, dir, format_strs=None):
260 |         self.dir = dir
261 |         if format_strs is None:
262 |             format_strs = LOG_OUTPUT_FORMATS
263 |         output_formats = [make_output_format(f, dir) for f in format_strs]
264 |         Logger.CURRENT = Logger(dir=dir, output_formats=output_formats)
265 | 
266 |     def __enter__(self):
267 |         os.makedirs(self.evaluation_dir(), exist_ok=True)
268 |         output_formats = [make_output_format(
269 |             f, self.evaluation_dir()) for f in LOG_OUTPUT_FORMATS]
270 |         Logger.CURRENT = Logger(dir=self.dir, output_formats=output_formats)
271 | 
272 |     def __exit__(self, *args):
273 |         Logger.CURRENT.close()
274 |         Logger.CURRENT = Logger.DEFAULT
275 | 
276 |     def evaluation_dir(self):
277 |         return self.dir
278 | 
279 | 
280 | # ================================================================
281 | 
282 | 
283 | def _demo():
284 |     info("hi")
285 |     debug("shouldn't appear")
286 |     set_level(DEBUG)
287 |     debug("should appear")
288 |     dir = "/tmp/testlogging"
289 |     if os.path.exists(dir):
290 |         shutil.rmtree(dir)
291 |     with session(dir=dir):
292 |         record_tabular("a", 3)
293 |         record_tabular("b", 2.5)
294 |         dump_tabular()
295 |         record_tabular("b", -2.5)
296 |         record_tabular("a", 5.5)
297 |         dump_tabular()
298 |         info("^^^ should see a = 5.5")
299 | 
300 |     record_tabular("b", -2.5)
301 |     dump_tabular()
302 | 
303 |     record_tabular("a", "longasslongasslongasslongasslongasslongassvalue")
304 |     dump_tabular()
305 | 
306 | 
307 | if __name__ == "__main__":
308 |     _demo()
309 | 


--------------------------------------------------------------------------------
/labs/lab3/scripts/setup_xquartz.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Check if XQuartz is installed
 3 | 
 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@"
 5 | 
 6 | app_dir=/Applications/Utilities/XQuartz.app
 7 | 
 8 | if [ -d $app_dir ]; then
 9 |     # Check installed version
10 |     app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString)
11 |     if [ $app_version == "2.7.11" ]; then
12 |         defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
13 |         defaults write org.macosforge.xquartz.X11 no_auth -bool false
14 |         defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
15 |         echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!"
16 |         exit
17 |     else
18 |         read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response
19 |         case "$response" in
20 |             [yY][eE][sS]|[yY]) 
21 |                 ;;
22 |             *)
23 |                 exit
24 |                 ;;
25 |         esac
26 |     fi
27 | fi
28 | 
29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg
30 | dmg_path=/tmp/xquartz.dmg
31 | echo "Downloading dmg from $url..."
32 | /usr/bin/curl -L -o $dmg_path $url
33 | echo "Mounting dmg file..."
34 | hdiutil mount $dmg_path
35 | echo "Installing..."
36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg  -target /
37 | 
38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false
40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
41 | 
42 | echo "Done! Make sure to log out and then log back in for the changes to take effect."
43 | 


--------------------------------------------------------------------------------
/labs/lab3/scripts/test_environment_setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | def main():
 5 |     import roboschool
 6 |     import gym
 7 |     import chainer
 8 |     env = gym.make('CartPole-v0')
 9 |     env.reset()
10 |     env.step(env.action_space.sample())
11 |     env = gym.make('RoboschoolHalfCheetah-v1')
12 |     env.reset()
13 |     env.step(env.action_space.sample())
14 |     print("Your environment has been successfully set up!")
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/labs/lab3/simpledqn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3/simpledqn/__init__.py


--------------------------------------------------------------------------------
/labs/lab3/simpledqn/gridworld_env.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | from six import StringIO, b
  4 | 
  5 | from gym import utils
  6 | from gym.envs.toy_text import discrete
  7 | from gym.envs.registration import register
  8 | 
  9 | LEFT = 0
 10 | DOWN = 1
 11 | RIGHT = 2
 12 | UP = 3
 13 | 
 14 | MAPS = {
 15 |     "4x4": [
 16 |         "SFFF",
 17 |         "FFFH",
 18 |         "FFFF",
 19 |         "HFFG"
 20 |     ],
 21 |     "8x8": [
 22 |         "SFFFFFFF",
 23 |         "FFFFFFFF",
 24 |         "FFFHFFFF",
 25 |         "FFFFFHFF",
 26 |         "FFFHFFFF",
 27 |         "FHHFFFHF",
 28 |         "FHFFHFHF",
 29 |         "FFFHFFFG"
 30 |     ],
 31 |     "9x9": [
 32 |         "HFFFFFFFH",
 33 |         "FFFFFFFFF",
 34 |         "FFFFFFFFF",
 35 |         "FFFFFFFFF",
 36 |         "FFFFSFFFF",
 37 |         "FFFFFFFFF",
 38 |         "FFFFFFFFF",
 39 |         "FFFFFFFFF",
 40 |         "HFFFFFFFH"
 41 |     ]
 42 | }
 43 | 
 44 | 
 45 | def to_one_hot(x, len):
 46 |     one_hot = np.zeros(len)
 47 |     one_hot[x] = 1
 48 |     return one_hot
 49 | 
 50 | 
 51 | class GridWorld(discrete.DiscreteEnv):
 52 |     """
 53 |     Winter is here. You and your friends were tossing around a frisbee at the park
 54 |     when you made a wild throw that left the frisbee out in the middle of the lake.
 55 |     The water is mostly frozen, but there are a few holes where the ice has melted.
 56 |     If you step into one of those holes, you'll fall into the freezing water.
 57 |     At this time, there's an international frisbee shortage, so it's absolutely imperative that
 58 |     you navigate across the lake and retrieve the disc.
 59 |     However, the ice is slippery, so you won't always move in the direction you intend.
 60 |     The surface is described using a grid like the following
 61 | 
 62 |         SFFF
 63 |         FHFH
 64 |         FFFH
 65 |         HFFG
 66 | 
 67 |     S : starting point, safe
 68 |     F : frozen surface, safe
 69 |     H : hole, fall to your doom
 70 |     G : goal, where the frisbee is located
 71 | 
 72 |     The episode ends when you reach the goal or fall in a hole.
 73 |     You receive a reward of 1 if you reach the goal, and zero otherwise.
 74 | 
 75 |     """
 76 | 
 77 |     metadata = {'render.modes': ['human', 'ansi']}
 78 | 
 79 |     def __init__(self, desc=None, map_name="4x4", is_slippery=False):
 80 |         if desc is None and map_name is None:
 81 |             raise ValueError('Must provide either desc or map_name')
 82 |         elif desc is None:
 83 |             desc = MAPS[map_name]
 84 |         self.desc = desc = np.asarray(desc, dtype='c')
 85 |         self.nrow, self.ncol = nrow, ncol = desc.shape
 86 | 
 87 |         nA = 4
 88 |         nS = nrow * ncol
 89 | 
 90 |         isd = np.array(desc == b'S').astype('float64').ravel()
 91 |         isd /= isd.sum()
 92 | 
 93 |         P = {s: {a: [] for a in range(nA)} for s in range(nS)}
 94 | 
 95 |         def to_s(row, col):
 96 |             return row * ncol + col
 97 | 
 98 |         def inc(row, col, a):
 99 |             if a == 0:  # left
100 |                 col = max(col - 1, 0)
101 |             elif a == 1:  # down
102 |                 row = min(row + 1, nrow - 1)
103 |             elif a == 2:  # right
104 |                 col = min(col + 1, ncol - 1)
105 |             elif a == 3:  # up
106 |                 row = max(row - 1, 0)
107 |             return (row, col)
108 | 
109 |         for row in range(nrow):
110 |             for col in range(ncol):
111 |                 s = to_s(row, col)
112 |                 for a in range(4):
113 |                     li = P[s][a]
114 |                     letter = desc[row, col]
115 |                     if letter in b'GH':
116 |                         li.append((1.0, s, 0, True))
117 |                     else:
118 |                         if is_slippery:
119 |                             for b in [(a - 1) % 4, a, (a + 1) % 4]:
120 |                                 newrow, newcol = inc(row, col, b)
121 |                                 newstate = to_s(newrow, newcol)
122 |                                 newletter = desc[newrow, newcol]
123 |                                 done = bytes(newletter) in b'GH'
124 |                                 if newletter == b'G':
125 |                                     rew = 1.0
126 |                                 elif newletter == b'H':
127 |                                     rew = .0
128 |                                 else:
129 |                                     rew = 0.
130 |                                 # rew = float(newletter == b'G')
131 |                                 li.append((1.0 / 3.0, newstate, rew, done))
132 |                         else:
133 |                             newrow, newcol = inc(row, col, a)
134 |                             newstate = to_s(newrow, newcol)
135 |                             newletter = desc[newrow, newcol]
136 |                             done = bytes(newletter) in b'GH'
137 |                             # rew = float(newletter == b'G')
138 |                             if newletter == b'G':
139 |                                 rew = 1.0
140 |                             elif newletter == b'H':
141 |                                 rew = 0.
142 |                             else:
143 |                                 rew = 0.
144 |                             li.append((1.0, newstate, rew, done))
145 | 
146 |         super(GridWorld, self).__init__(nS, nA, P, isd)
147 | 
148 |     def _reset(self):
149 |         s = super(GridWorld, self)._reset()
150 |         return to_one_hot(s, self.nS)
151 | 
152 |     def _step(self, a):
153 |         s, r, d, p = super(GridWorld, self)._step(a)
154 |         return to_one_hot(s, self.nS), r, d, p
155 | 
156 |     def print_obs(self, obs):
157 |         import copy
158 |         map = copy.deepcopy(self.desc).astype(str)
159 |         _obs = int(np.where(obs == 1)[0][0])
160 |         map[_obs // 9, _obs % 9] = 'X'
161 |         for row in map:
162 |             print(row)
163 | 
164 |     def _render(self, mode='human', close=False):
165 |         if close:
166 |             return
167 |         outfile = StringIO() if mode == 'ansi' else sys.stdout
168 | 
169 |         row, col = self.s // self.ncol, self.s % self.ncol
170 |         desc = self.desc.tolist()
171 |         desc = [[c.decode('utf-8') for c in line] for line in desc]
172 |         desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
173 |         if self.lastaction is not None:
174 |             outfile.write("  ({})\n".format(
175 |                 ["Left", "Down", "Right", "Up"][self.lastaction]))
176 |         else:
177 |             outfile.write("\n")
178 |         outfile.write("\n".join(''.join(line) for line in desc) + "\n")
179 | 
180 |         if mode != 'human':
181 |             return outfile
182 | 
183 | 
184 | register(
185 |     'GridWorld-v0',
186 |     entry_point='simpledqn.gridworld_env:GridWorld',
187 |     timestep_limit=40,
188 | )
189 | 


--------------------------------------------------------------------------------
/labs/lab3/simpledqn/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import pickle
 4 | 
 5 | 
 6 | class ReplayBuffer(object):
 7 |     def __init__(self, max_size):
 8 |         """Simple replay buffer for storing sampled DQN (s, a, s', r) transitions as tuples.
 9 | 
10 |         :param size: Maximum size of the replay buffer.
11 |         """
12 |         self._buffer = []
13 |         self._max_size = max_size
14 |         self._idx = 0
15 | 
16 |     def __len__(self):
17 |         return len(self._buffer)
18 | 
19 |     def add(self, obs_t, act, rew, obs_tp1, done):
20 |         """
21 |         Add a new sample to the replay buffer.
22 |         :param obs_t: observation at time t
23 |         :param act:  action
24 |         :param rew: reward
25 |         :param obs_tp1: observation at time t+1
26 |         :param done: termination signal (whether episode has finished or not)
27 |         """
28 |         data = (obs_t, act, rew, obs_tp1, done)
29 |         if self._idx >= len(self._buffer):
30 |             self._buffer.append(data)
31 |         else:
32 |             self._buffer[self._idx] = data
33 |         self._idx = (self._idx + 1) % self._max_size
34 | 
35 |     def _encode_sample(self, idxes):
36 |         obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
37 |         for i in idxes:
38 |             data = self._buffer[i]
39 |             obs_t, action, reward, obs_tp1, done = data
40 |             obses_t.append(np.array(obs_t, copy=False))
41 |             actions.append(np.array(action, copy=False))
42 |             rewards.append(reward)
43 |             obses_tp1.append(np.array(obs_tp1, copy=False))
44 |             dones.append(done)
45 |         return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones)
46 | 
47 |     def sample(self, batch_size):
48 |         """Sample a batch of transition tuples.
49 | 
50 |         :param batch_size: Number of sampled transition tuples.
51 |         :return: Tuple of transitions.
52 |         """
53 |         idxes = [random.randint(0, len(self._buffer) - 1)
54 |                  for _ in range(batch_size)]
55 |         return self._encode_sample(idxes)
56 | 
57 |     def dump(self, file_path=None):
58 |         """Dump the replay buffer into a file.
59 |         """
60 |         file = open(file_path, 'wb')
61 |         pickle.dump(self._buffer, file, -1)
62 |         file.close()
63 | 
64 |     def load(self, file_path=None):
65 |         """Load the replay buffer from a file
66 |         """
67 |         file = open(file_path, 'rb')
68 |         self._buffer = pickle.load(file)
69 |         file.close()
70 | 


--------------------------------------------------------------------------------
/labs/lab3/simpledqn/replay_buffer_warm_start.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3/simpledqn/replay_buffer_warm_start.pkl


--------------------------------------------------------------------------------
/labs/lab3/simpledqn/simple_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.special
 3 | import chainer
 4 | 
 5 | 
 6 | # Compute gradient approximately using finite difference
 7 | def numerical_grad(f, x, eps=1e-8):
 8 |     grad = np.zeros_like(x)
 9 |     for i in range(len(x)):
10 |         xplus = np.array(x)
11 |         xplus[i] += eps
12 |         fplus = f(xplus)
13 |         xminus = np.array(x)
14 |         xminus[i] -= eps
15 |         fminus = f(xminus)
16 |         grad[i] = (fplus - fminus) / (2 * eps)
17 |     return grad
18 | 
19 | 
20 | def gradient_check(f, g, x):
21 |     # Test the implementation of g(x) = df/dx
22 |     # Perform numerical differentiation and test it
23 |     g_num = numerical_grad(f, x)
24 |     g_test = g(x)
25 |     try:
26 |         np.testing.assert_allclose(g_num, g_test, rtol=1e-5)
27 |         print("Gradient check passed!")
28 |     except AssertionError as e:
29 |         print(e)
30 |         print("Warning: Gradient check didn't pass!")
31 | 
32 | 
33 | def log_softmax(logits):
34 |     return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True)
35 | 
36 | 
37 | def softmax(logits):
38 |     x = logits
39 |     x = x - np.max(x, axis=-1, keepdims=True)
40 |     x = np.exp(x)
41 |     return x / np.sum(x, axis=-1, keepdims=True)
42 | 
43 | 
44 | def weighted_sample(logits, rng=np.random):
45 |     weights = softmax(logits)
46 |     return min(
47 |         int(np.sum(rng.uniform() > np.cumsum(weights))),
48 |         len(weights) - 1
49 |     )
50 | 
51 | 
52 | def include_bias(x):
53 |     # Add a constant term (1.0) to each entry in x
54 |     return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1)
55 | 
56 | 
57 | _tested = set()
58 | 
59 | nprs = np.random.RandomState
60 | 
61 | 
62 | def assert_allclose(a, b):
63 |     if isinstance(a, (np.ndarray, float, int)):
64 |         np.testing.assert_allclose(a, b)
65 |     elif isinstance(a, (tuple, list)):
66 |         assert isinstance(b, (tuple, list))
67 |         assert len(a) == len(b)
68 |         for a_i, b_i in zip(a, b):
69 |             assert_allclose(a_i, b_i)
70 |     elif isinstance(a, chainer.Variable):
71 |         assert isinstance(b, chainer.Variable)
72 |         assert_allclose(a.data, b.data)
73 |     else:
74 |         raise NotImplementedError
75 | 
76 | 
77 | def test_once(fn, kwargs, desired_output=None):
78 |     if fn.__name__ in _tested:
79 |         return
80 |     _tested.add(fn.__name__)
81 | 
82 |     if callable(kwargs):
83 |         kwargs = kwargs()
84 | 
85 |     if callable(desired_output):
86 |         desired_output = desired_output()
87 | 
88 |     if desired_output is None:
89 |         print("Desired output for %s:" % (fn.__name__), repr(fn(**kwargs)))
90 |         exit()
91 |     else:
92 |         try:
93 |             output = fn(**kwargs)
94 |             assert_allclose(desired_output, output)
95 |             print("Test for %s passed!" % (fn.__name__))
96 |         except AssertionError as e:
97 |             print(e)
98 |             print("Warning: test for %s didn't pass!" % (fn.__name__))
99 | 


--------------------------------------------------------------------------------
/labs/lab3/simpledqn/weights_warm_start.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3/simpledqn/weights_warm_start.pkl


--------------------------------------------------------------------------------
/labs/lab3/simpledqn/wrappers.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | 
 4 | 
 5 | class NoopResetEnv(gym.Wrapper):
 6 |     def __init__(self, env=None, noop_max=30):
 7 |         """Sample initial states by taking random number of no-ops on reset.
 8 |         No-op is assumed to be action 0.
 9 |         """
10 |         super(NoopResetEnv, self).__init__(env)
11 |         self.noop_max = noop_max
12 |         self.override_num_noops = None
13 |         assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
14 | 
15 |     def _reset(self):
16 |         """ Do no-op action for a number of steps in [1, noop_max]."""
17 |         self.env.reset()
18 |         if self.override_num_noops is not None:
19 |             noops = self.override_num_noops
20 |         else:
21 |             noops = np.random.randint(1, self.noop_max + 1)
22 |         assert noops > 0
23 |         obs = None
24 |         for _ in range(noops):
25 |             obs, _, done, _ = self.env.step(0)
26 |             if done:
27 |                 obs = self.env.reset()
28 |         return obs
29 | 
30 | 
31 | class EpisodicLifeEnv(gym.Wrapper):
32 |     def __init__(self, env=None):
33 |         """Make end-of-life == end-of-episode, but only reset on true game over.
34 |         Done by DeepMind for the DQN and co. since it helps value estimation.
35 |         """
36 |         super(EpisodicLifeEnv, self).__init__(env)
37 |         self.lives = 0
38 |         self.was_real_done = True
39 |         self.was_real_reset = False
40 | 
41 |     def _step(self, action):
42 |         obs, reward, done, info = self.env.step(action)
43 |         self.was_real_done = done
44 |         # check current lives, make loss of life terminal,
45 |         # then update lives to handle bonus lives
46 |         lives = self.env.unwrapped.ale.lives()
47 |         if lives < self.lives and lives > 0:
48 |             # for Qbert somtimes we stay in lives == 0 condtion for a few frames
49 |             # so its important to keep lives > 0, so that we only reset once
50 |             # the environment advertises done.
51 |             done = True
52 |         self.lives = lives
53 |         return obs, reward, done, info
54 | 
55 |     def _reset(self):
56 |         """Reset only when lives are exhausted.
57 |         This way all states are still reachable even though lives are episodic,
58 |         and the learner need not know about any of this behind-the-scenes.
59 |         """
60 |         if self.was_real_done:
61 |             obs = self.env.reset()
62 |             self.was_real_reset = True
63 |         else:
64 |             # no-op step to advance from terminal/lost life state
65 |             obs, _, _, _ = self.env.step(0)
66 |             self.was_real_reset = False
67 |         self.lives = self.env.unwrapped.ale.lives()
68 |         return obs
69 | 


--------------------------------------------------------------------------------
/labs/lab3/viskit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab3/viskit/__init__.py


--------------------------------------------------------------------------------
/labs/lab3/viskit/core.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import itertools
  3 | import json
  4 | import os
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | # from sandbox.rocky.utils.py_utils import AttrDict
 10 | 
 11 | class AttrDict(dict):
 12 |     def __init__(self, *args, **kwargs):
 13 |         super(AttrDict, self).__init__(*args, **kwargs)
 14 |         self.__dict__ = self
 15 | 
 16 | 
 17 | def unique(l):
 18 |     return list(set(l))
 19 | 
 20 | 
 21 | def flatten(l):
 22 |     return [item for sublist in l for item in sublist]
 23 | 
 24 | 
 25 | def load_progress(progress_json_path, verbose=True):
 26 |     if verbose:
 27 |         print("Reading %s" % progress_json_path)
 28 |     entries = dict()
 29 |     rows = []
 30 |     with open(progress_json_path, 'r') as f:
 31 |         lines = f.read().split('\n')
 32 |         for line in lines:
 33 |             if len(line) > 0:
 34 |                 row = json.loads(line)
 35 |                 rows.append(row)
 36 |     all_keys = set(k for row in rows for k in row.keys())
 37 |     for k in all_keys:
 38 |         if k not in entries:
 39 |             entries[k] = []
 40 |         for row in rows:
 41 |             if k in row:
 42 |                 v = row[k]
 43 |                 try:
 44 |                     entries[k].append(float(v))
 45 |                 except:
 46 |                     entries[k].append(np.nan)
 47 |             else:
 48 |                 entries[k].append(np.nan)
 49 | 
 50 |         # entries[key] = [row.get(key, np.nan) for row in rows]
 51 |         #         added_keys = set()
 52 |         #         for k, v in row.items():
 53 |         #             if k not in entries:
 54 |         #                 entries[k] = []
 55 |         #             try:
 56 |         #                 entries[k].append(float(v))
 57 |         #             except:
 58 |         #                 entries[k].append(0.)
 59 |         #             added_keys.add(k)
 60 |         #         for k in entries.keys():
 61 |         #             if k not in added_keys:
 62 |         #                 entries[k].append(np.nan)
 63 |     entries = dict([(k, np.array(v)) for k, v in entries.items()])
 64 |     return entries
 65 | 
 66 | 
 67 | def flatten_dict(d):
 68 |     flat_params = dict()
 69 |     for k, v in d.items():
 70 |         if isinstance(v, dict):
 71 |             v = flatten_dict(v)
 72 |             for subk, subv in flatten_dict(v).items():
 73 |                 flat_params[k + "." + subk] = subv
 74 |         else:
 75 |             flat_params[k] = v
 76 |     return flat_params
 77 | 
 78 | 
 79 | def load_params(params_json_path):
 80 |     with open(params_json_path, 'r') as f:
 81 |         data = json.loads(f.read())
 82 |         if "args_data" in data:
 83 |             del data["args_data"]
 84 |         if "exp_name" not in data:
 85 |             data["exp_name"] = params_json_path.split("/")[-2]
 86 |     return data
 87 | 
 88 | 
 89 | def lookup(d, keys):
 90 |     if not isinstance(keys, list):
 91 |         keys = keys.split(".")
 92 |     for k in keys:
 93 |         if hasattr(d, "__getitem__"):
 94 |             if k in d:
 95 |                 d = d[k]
 96 |             else:
 97 |                 return None
 98 |         else:
 99 |             return None
100 |     return d
101 | 
102 | 
103 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True):
104 |     if isinstance(exp_folder_paths, str):
105 |         exp_folder_paths = [exp_folder_paths]
106 |     exps = []
107 |     for exp_folder_path in exp_folder_paths:
108 |         exps += [x[0] for x in os.walk(exp_folder_path)]
109 |     if verbose:
110 |         print("finished walking exp folders")
111 |     exps_data = []
112 |     for exp in exps:
113 |         try:
114 |             exp_path = exp
115 |             variant_json_path = os.path.join(exp_path, "variant.json")
116 |             progress_json_path = os.path.join(exp_path, "progress.json")
117 |             progress = load_progress(progress_json_path, verbose=verbose)
118 |             try:
119 |                 params = load_params(variant_json_path)
120 |             except IOError:
121 |                 params = dict(exp_name="experiment")
122 |             exps_data.append(AttrDict(
123 |                 progress=progress, params=params, flat_params=flatten_dict(params)))
124 |         except IOError as e:
125 |             if verbose:
126 |                 print(e)
127 | 
128 |     # a dictionary of all keys and types of values
129 |     all_keys = dict()
130 |     for data in exps_data:
131 |         for key in data.flat_params.keys():
132 |             if key not in all_keys:
133 |                 all_keys[key] = type(data.flat_params[key])
134 | 
135 |     # if any data does not have some key, specify the value of it
136 |     if not ignore_missing_keys:
137 |         default_values = dict()
138 |         for data in exps_data:
139 |             for key in sorted(all_keys.keys()):
140 |                 if key not in data.flat_params:
141 |                     if key not in default_values:
142 |                         default = None
143 |                         default_values[key] = default
144 |                     data.flat_params[key] = default_values[key]
145 | 
146 |     return exps_data
147 | 
148 | 
149 | def smart_repr(x):
150 |     if isinstance(x, tuple):
151 |         if len(x) == 0:
152 |             return "tuple()"
153 |         elif len(x) == 1:
154 |             return "(%s,)" % smart_repr(x[0])
155 |         else:
156 |             return "(" + ",".join(map(smart_repr, x)) + ")"
157 |     else:
158 |         if hasattr(x, "__call__"):
159 |             return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__)
160 |         else:
161 |             return repr(x)
162 | 
163 | 
164 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1):
165 |     try:
166 |         stringified_pairs = sorted(
167 |             map(
168 |                 eval,
169 |                 unique(
170 |                     flatten(
171 |                         [
172 |                             list(
173 |                                 map(
174 |                                     smart_repr,
175 |                                     list(d.flat_params.items())
176 |                                 )
177 |                             )
178 |                             for d in exps_data
179 |                         ]
180 |                     )
181 |                 )
182 |             ),
183 |             key=lambda x: (
184 |                 tuple("" if it is None else str(it) for it in x),
185 |             )
186 |         )
187 |     except Exception as e:
188 |         print(e)
189 |         import ipdb
190 |         ipdb.set_trace()
191 |     proposals = [(k, [x[1] for x in v])
192 |                  for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])]
193 |     filtered = [(k, v) for (k, v) in proposals if len(v) > l and all(
194 |         [k.find(excluded_param) != 0 for excluded_param in excluded_params])]
195 |     return filtered
196 | 
197 | 
198 | class Selector(object):
199 |     def __init__(self, exps_data, filters=None, custom_filters=None):
200 |         self._exps_data = exps_data
201 |         if filters is None:
202 |             self._filters = tuple()
203 |         else:
204 |             self._filters = tuple(filters)
205 |         if custom_filters is None:
206 |             self._custom_filters = []
207 |         else:
208 |             self._custom_filters = custom_filters
209 | 
210 |     def where(self, k, v):
211 |         return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters)
212 | 
213 |     def custom_filter(self, filter):
214 |         return Selector(self._exps_data, self._filters, self._custom_filters + [filter])
215 | 
216 |     def _check_exp(self, exp):
217 |         # or exp.flat_params.get(k, None) is None
218 |         return all(
219 |             ((str(exp.flat_params.get(k, None)) == str(v) or (
220 |                 k not in exp.flat_params)) for k, v in self._filters)
221 |         ) and all(custom_filter(exp) for custom_filter in self._custom_filters)
222 | 
223 |     def extract(self):
224 |         return list(filter(self._check_exp, self._exps_data))
225 | 
226 |     def iextract(self):
227 |         return filter(self._check_exp, self._exps_data)
228 | 
229 | 
230 | # Taken from plot.ly
231 | color_defaults = [
232 |     '#1f77b4',  # muted blue
233 |     '#ff7f0e',  # safety orange
234 |     '#2ca02c',  # cooked asparagus green
235 |     '#d62728',  # brick red
236 |     '#9467bd',  # muted purple
237 |     '#8c564b',  # chestnut brown
238 |     '#e377c2',  # raspberry yogurt pink
239 |     '#7f7f7f',  # middle gray
240 |     '#bcbd22',  # curry yellow-green
241 |     '#17becf'  # blue-teal
242 | ]
243 | 
244 | 
245 | def hex_to_rgb(hex, opacity=1.0):
246 |     if hex[0] == '#':
247 |         hex = hex[1:]
248 |     assert (len(hex) == 6)
249 |     return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity)
250 | 


--------------------------------------------------------------------------------
/labs/lab3/viskit/static/css/dropdowns-enhancement.css:
--------------------------------------------------------------------------------
  1 | .dropdown-menu > li > label {
  2 |   display: block;
  3 |   padding: 3px 20px;
  4 |   clear: both;
  5 |   font-weight: normal;
  6 |   line-height: 1.42857143;
  7 |   color: #333333;
  8 |   white-space: nowrap;
  9 | }
 10 | .dropdown-menu > li > label:hover,
 11 | .dropdown-menu > li > label:focus {
 12 |   text-decoration: none;
 13 |   color: #262626;
 14 |   background-color: #f5f5f5;
 15 | }
 16 | .dropdown-menu > li > input:checked ~ label,
 17 | .dropdown-menu > li > input:checked ~ label:hover,
 18 | .dropdown-menu > li > input:checked ~ label:focus,
 19 | .dropdown-menu > .active > label,
 20 | .dropdown-menu > .active > label:hover,
 21 | .dropdown-menu > .active > label:focus {
 22 |   color: #ffffff;
 23 |   text-decoration: none;
 24 |   outline: 0;
 25 |   background-color: #428bca;
 26 | }
 27 | .dropdown-menu > li > input[disabled] ~ label,
 28 | .dropdown-menu > li > input[disabled] ~ label:hover,
 29 | .dropdown-menu > li > input[disabled] ~ label:focus,
 30 | .dropdown-menu > .disabled > label,
 31 | .dropdown-menu > .disabled > label:hover,
 32 | .dropdown-menu > .disabled > label:focus {
 33 |   color: #999999;
 34 | }
 35 | .dropdown-menu > li > input[disabled] ~ label:hover,
 36 | .dropdown-menu > li > input[disabled] ~ label:focus,
 37 | .dropdown-menu > .disabled > label:hover,
 38 | .dropdown-menu > .disabled > label:focus {
 39 |   text-decoration: none;
 40 |   background-color: transparent;
 41 |   background-image: none;
 42 |   filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
 43 |   cursor: not-allowed;
 44 | }
 45 | .dropdown-menu > li > label {
 46 |   margin-bottom: 0;
 47 |   cursor: pointer;
 48 | }
 49 | .dropdown-menu > li > input[type="radio"],
 50 | .dropdown-menu > li > input[type="checkbox"] {
 51 |   display: none;
 52 |   position: absolute;
 53 |   top: -9999em;
 54 |   left: -9999em;
 55 | }
 56 | .dropdown-menu > li > label:focus,
 57 | .dropdown-menu > li > input:focus ~ label {
 58 |   outline: thin dotted;
 59 |   outline: 5px auto -webkit-focus-ring-color;
 60 |   outline-offset: -2px;
 61 | }
 62 | .dropdown-menu.pull-right {
 63 |   right: 0;
 64 |   left: auto;
 65 | }
 66 | .dropdown-menu.pull-top {
 67 |   bottom: 100%;
 68 |   top: auto;
 69 |   margin: 0 0 2px;
 70 |   -webkit-box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175);
 71 |   box-shadow: 0 -6px 12px rgba(0, 0, 0, 0.175);
 72 | }
 73 | .dropdown-menu.pull-center {
 74 |   right: 50%;
 75 |   left: auto;
 76 | }
 77 | .dropdown-menu.pull-middle {
 78 |   right: 100%;
 79 |   margin: 0 2px 0 0;
 80 |   box-shadow: -5px 0 10px rgba(0, 0, 0, 0.2);
 81 |   left: auto;
 82 | }
 83 | .dropdown-menu.pull-middle.pull-right {
 84 |   right: auto;
 85 |   left: 100%;
 86 |   margin: 0 0 0 2px;
 87 |   box-shadow: 5px 0 10px rgba(0, 0, 0, 0.2);
 88 | }
 89 | .dropdown-menu.pull-middle.pull-center {
 90 |   right: 50%;
 91 |   margin: 0;
 92 |   box-shadow: 0 0 10px rgba(0, 0, 0, 0.2);
 93 | }
 94 | .dropdown-menu.bullet {
 95 |   margin-top: 8px;
 96 | }
 97 | .dropdown-menu.bullet:before {
 98 |   width: 0;
 99 |   height: 0;
100 |   content: '';
101 |   display: inline-block;
102 |   position: absolute;
103 |   border-color: transparent;
104 |   border-style: solid;
105 |   -webkit-transform: rotate(360deg);
106 |   border-width: 0 7px 7px;
107 |   border-bottom-color: #cccccc;
108 |   border-bottom-color: rgba(0, 0, 0, 0.15);
109 |   top: -7px;
110 |   left: 9px;
111 | }
112 | .dropdown-menu.bullet:after {
113 |   width: 0;
114 |   height: 0;
115 |   content: '';
116 |   display: inline-block;
117 |   position: absolute;
118 |   border-color: transparent;
119 |   border-style: solid;
120 |   -webkit-transform: rotate(360deg);
121 |   border-width: 0 6px 6px;
122 |   border-bottom-color: #ffffff;
123 |   top: -6px;
124 |   left: 10px;
125 | }
126 | .dropdown-menu.bullet.pull-right:before {
127 |   left: auto;
128 |   right: 9px;
129 | }
130 | .dropdown-menu.bullet.pull-right:after {
131 |   left: auto;
132 |   right: 10px;
133 | }
134 | .dropdown-menu.bullet.pull-top {
135 |   margin-top: 0;
136 |   margin-bottom: 8px;
137 | }
138 | .dropdown-menu.bullet.pull-top:before {
139 |   top: auto;
140 |   bottom: -7px;
141 |   border-bottom-width: 0;
142 |   border-top-width: 7px;
143 |   border-top-color: #cccccc;
144 |   border-top-color: rgba(0, 0, 0, 0.15);
145 | }
146 | .dropdown-menu.bullet.pull-top:after {
147 |   top: auto;
148 |   bottom: -6px;
149 |   border-bottom: none;
150 |   border-top-width: 6px;
151 |   border-top-color: #ffffff;
152 | }
153 | .dropdown-menu.bullet.pull-center:before {
154 |   left: auto;
155 |   right: 50%;
156 |   margin-right: -7px;
157 | }
158 | .dropdown-menu.bullet.pull-center:after {
159 |   left: auto;
160 |   right: 50%;
161 |   margin-right: -6px;
162 | }
163 | .dropdown-menu.bullet.pull-middle {
164 |   margin-right: 8px;
165 | }
166 | .dropdown-menu.bullet.pull-middle:before {
167 |   top: 50%;
168 |   left: 100%;
169 |   right: auto;
170 |   margin-top: -7px;
171 |   border-right-width: 0;
172 |   border-bottom-color: transparent;
173 |   border-top-width: 7px;
174 |   border-left-color: #cccccc;
175 |   border-left-color: rgba(0, 0, 0, 0.15);
176 | }
177 | .dropdown-menu.bullet.pull-middle:after {
178 |   top: 50%;
179 |   left: 100%;
180 |   right: auto;
181 |   margin-top: -6px;
182 |   border-right-width: 0;
183 |   border-bottom-color: transparent;
184 |   border-top-width: 6px;
185 |   border-left-color: #ffffff;
186 | }
187 | .dropdown-menu.bullet.pull-middle.pull-right {
188 |   margin-right: 0;
189 |   margin-left: 8px;
190 | }
191 | .dropdown-menu.bullet.pull-middle.pull-right:before {
192 |   left: -7px;
193 |   border-left-width: 0;
194 |   border-right-width: 7px;
195 |   border-right-color: #cccccc;
196 |   border-right-color: rgba(0, 0, 0, 0.15);
197 | }
198 | .dropdown-menu.bullet.pull-middle.pull-right:after {
199 |   left: -6px;
200 |   border-left-width: 0;
201 |   border-right-width: 6px;
202 |   border-right-color: #ffffff;
203 | }
204 | .dropdown-menu.bullet.pull-middle.pull-center {
205 |   margin-left: 0;
206 |   margin-right: 0;
207 | }
208 | .dropdown-menu.bullet.pull-middle.pull-center:before {
209 |   border: none;
210 |   display: none;
211 | }
212 | .dropdown-menu.bullet.pull-middle.pull-center:after {
213 |   border: none;
214 |   display: none;
215 | }
216 | .dropdown-submenu {
217 |   position: relative;
218 | }
219 | .dropdown-submenu > .dropdown-menu {
220 |   top: 0;
221 |   left: 100%;
222 |   margin-top: -6px;
223 |   margin-left: -1px;
224 |   border-top-left-radius: 0;
225 | }
226 | .dropdown-submenu > a:before {
227 |   display: block;
228 |   float: right;
229 |   width: 0;
230 |   height: 0;
231 |   content: "";
232 |   margin-top: 6px;
233 |   margin-right: -8px;
234 |   border-width: 4px 0 4px 4px;
235 |   border-style: solid;
236 |   border-left-style: dashed;
237 |   border-top-color: transparent;
238 |   border-bottom-color: transparent;
239 | }
240 | @media (max-width: 767px) {
241 |   .navbar-nav .dropdown-submenu > a:before {
242 |     margin-top: 8px;
243 |     border-color: inherit;
244 |     border-style: solid;
245 |     border-width: 4px 4px 0;
246 |     border-left-color: transparent;
247 |     border-right-color: transparent;
248 |   }
249 |   .navbar-nav .dropdown-submenu > a {
250 |     padding-left: 40px;
251 |   }
252 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > a,
253 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > label {
254 |     padding-left: 35px;
255 |   }
256 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > a,
257 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > label {
258 |     padding-left: 45px;
259 |   }
260 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a,
261 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label {
262 |     padding-left: 55px;
263 |   }
264 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a,
265 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label {
266 |     padding-left: 65px;
267 |   }
268 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > a,
269 |   .navbar-nav > .open > .dropdown-menu > .dropdown-submenu > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > .dropdown-menu > li > label {
270 |     padding-left: 75px;
271 |   }
272 | }
273 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a,
274 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover,
275 | .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus {
276 |   background-color: #e7e7e7;
277 |   color: #555555;
278 | }
279 | @media (max-width: 767px) {
280 |   .navbar-default .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before {
281 |     border-top-color: #555555;
282 |   }
283 | }
284 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a,
285 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:hover,
286 | .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:focus {
287 |   background-color: #080808;
288 |   color: #ffffff;
289 | }
290 | @media (max-width: 767px) {
291 |   .navbar-inverse .navbar-nav .open > .dropdown-menu > .dropdown-submenu.open > a:before {
292 |     border-top-color: #ffffff;
293 |   }
294 | }
295 | 


--------------------------------------------------------------------------------
/labs/lab4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4.pdf


--------------------------------------------------------------------------------
/labs/lab4/alg_utils.py:
--------------------------------------------------------------------------------
  1 | from utils import *
  2 | 
  3 | 
  4 | # ==============================
  5 | # Shared utilities
  6 | # ==============================
  7 | 
  8 | def compute_cumulative_returns(rewards, baselines, discount):
  9 |     # This method builds up the cumulative sum of discounted rewards for each time step:
 10 |     # R[t] = sum_{t'>=t} γ^(t'-t)*r_t'
 11 |     # Note that we use γ^(t'-t) instead of γ^t'. This gives us a biased gradient but lower variance
 12 |     returns = []
 13 |     # Use the last baseline prediction to back up
 14 |     cum_return = baselines[-1]
 15 |     for reward in rewards[::-1]:
 16 |         cum_return = cum_return * discount + reward
 17 |         returns.append(cum_return)
 18 |     return returns[::-1]
 19 | 
 20 | 
 21 | def compute_advantages(rewards, baselines, discount, gae_lambda):
 22 |     # Given returns R_t and baselines b(s_t), compute (generalized) advantage estimate A_t
 23 |     deltas = rewards + discount * baselines[1:] - baselines[:-1]
 24 |     advs = []
 25 |     cum_adv = 0
 26 |     multiplier = discount * gae_lambda
 27 |     for delta in deltas[::-1]:
 28 |         cum_adv = cum_adv * multiplier + delta
 29 |         advs.append(cum_adv)
 30 |     return advs[::-1]
 31 | 
 32 | 
 33 | def compute_pg_vars(trajs, policy, baseline, discount, gae_lambda):
 34 |     """
 35 |     Compute chainer variables needed for various policy gradient algorithms
 36 |     """
 37 |     for traj in trajs:
 38 |         # Include the last observation here, in case the trajectory is not finished
 39 |         baselines = baseline.predict(np.concatenate(
 40 |             [traj["observations"], [traj["last_observation"]]]))
 41 |         if traj['finished']:
 42 |             # If already finished, the future cumulative rewards starting from the final state is 0
 43 |             baselines[-1] = 0.
 44 |         # This is useful when fitting baselines. It uses the baseline prediction of the last state value to perform
 45 |         # Bellman backup if the trajectory is not finished.
 46 |         traj['returns'] = compute_cumulative_returns(
 47 |             traj['rewards'], baselines, discount)
 48 |         traj['advantages'] = compute_advantages(
 49 |             traj['rewards'], baselines, discount, gae_lambda)
 50 |         traj['baselines'] = baselines[:-1]
 51 | 
 52 |     # First, we compute a flattened list of observations, actions, and advantages
 53 |     all_obs = np.concatenate([traj['observations'] for traj in trajs], axis=0)
 54 |     all_acts = np.concatenate([traj['actions'] for traj in trajs], axis=0)
 55 |     all_advs = np.concatenate([traj['advantages'] for traj in trajs], axis=0)
 56 |     all_dists = {
 57 |         k: np.concatenate([traj['distributions'][k] for traj in trajs], axis=0)
 58 |         for k in trajs[0]['distributions'].keys()
 59 |     }
 60 | 
 61 |     # Normalizing the advantage values can make the algorithm more robust to reward scaling
 62 |     all_advs = (all_advs - np.mean(all_advs)) / (np.std(all_advs) + 1e-8)
 63 | 
 64 |     # Form chainer variables
 65 |     all_obs = Variable(all_obs)
 66 |     all_acts = Variable(all_acts)
 67 |     all_advs = Variable(all_advs.astype(np.float32, copy=False))
 68 |     all_dists = policy.distribution.from_dict(
 69 |         {k: Variable(v) for k, v in all_dists.items()})
 70 | 
 71 |     return all_obs, all_acts, all_advs, all_dists
 72 | 
 73 | 
 74 | # ==============================
 75 | # Helper methods for logging
 76 | # ==============================
 77 | 
 78 | def log_reward_statistics(env):
 79 |     # keep unwrapping until we get the monitor
 80 |     while not isinstance(env, gym.wrappers.Monitor):  # and not isinstance()
 81 |         if not isinstance(env, gym.Wrapper):
 82 |             assert False
 83 |         env = env.env
 84 |     # env.unwrapped
 85 |     assert isinstance(env, gym.wrappers.Monitor)
 86 |     all_stats = None
 87 |     for _ in range(10):
 88 |         try:
 89 |             all_stats = gym.wrappers.monitoring.load_results(env.directory)
 90 |         except FileNotFoundError:
 91 |             time.sleep(1)
 92 |             continue
 93 |     if all_stats is not None:
 94 |         episode_rewards = all_stats['episode_rewards']
 95 |         episode_lengths = all_stats['episode_lengths']
 96 | 
 97 |         recent_episode_rewards = episode_rewards[-100:]
 98 |         recent_episode_lengths = episode_lengths[-100:]
 99 | 
100 |         if len(recent_episode_rewards) > 0:
101 |             logger.logkv('AverageReturn', np.mean(recent_episode_rewards))
102 |             logger.logkv('MinReturn', np.min(recent_episode_rewards))
103 |             logger.logkv('MaxReturn', np.max(recent_episode_rewards))
104 |             logger.logkv('StdReturn', np.std(recent_episode_rewards))
105 |             logger.logkv('AverageEpisodeLength',
106 |                          np.mean(recent_episode_lengths))
107 |             logger.logkv('MinEpisodeLength', np.min(recent_episode_lengths))
108 |             logger.logkv('MaxEpisodeLength', np.max(recent_episode_lengths))
109 |             logger.logkv('StdEpisodeLength', np.std(recent_episode_lengths))
110 | 
111 |         logger.logkv('TotalNEpisodes', len(episode_rewards))
112 |         logger.logkv('TotalNSamples', np.sum(episode_lengths))
113 | 
114 | 
115 | def log_baseline_statistics(trajs):
116 |     # Specifically, compute the explained variance, defined as
117 |     baselines = np.concatenate([traj['baselines'] for traj in trajs])
118 |     returns = np.concatenate([traj['returns'] for traj in trajs])
119 |     logger.logkv('ExplainedVariance',
120 |                  explained_variance_1d(baselines, returns))
121 | 
122 | 
123 | def log_action_distribution_statistics(dists):
124 |     with chainer.no_backprop_mode():
125 |         entropy = F.mean(dists.entropy()).data
126 |         logger.logkv('Entropy', entropy)
127 |         logger.logkv('Perplexity', np.exp(entropy))
128 |         if isinstance(dists, Gaussian):
129 |             logger.logkv('AveragePolicyStd', F.mean(
130 |                 F.exp(dists.log_stds)).data)
131 |             for idx in range(dists.log_stds.shape[-1]):
132 |                 logger.logkv('AveragePolicyStd[{}]'.format(
133 |                     idx), F.mean(F.exp(dists.log_stds[..., idx])).data)
134 |         elif isinstance(dists, Categorical):
135 |             probs = F.mean(F.softmax(dists.logits), axis=0).data
136 |             for idx in range(len(probs)):
137 |                 logger.logkv('AveragePolicyProb[{}]'.format(idx), probs[idx])
138 | 


--------------------------------------------------------------------------------
/labs/lab4/algs.py:
--------------------------------------------------------------------------------
1 | from pg import pg
2 | from trpo import trpo
3 | from a2c import a2c
4 | 


--------------------------------------------------------------------------------
/labs/lab4/docker_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | viskit_port=$("$DIR/findport.py" 5000 1)
 4 | xhost=xhost
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | if [[ $(uname) == 'Darwin' ]]; then
12 |     # if xhost not defined, check 
13 |     if ! hash $xhost 2>/dev/null; then
14 |         xhost=/opt/X11/bin/xhost
15 |         if [ ! -f $xhost ]; then
16 |             echo "xhost not found!"
17 |             exit
18 |         fi
19 |     fi
20 |     ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}')
21 |     $xhost + $ip >/dev/null
22 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
23 |         -e DISPLAY=$ip:0 \
24 |         -v "$DIR":/root/code/bootcamp_pg \
25 |         -ti dementrock/deeprlbootcamp \
26 |           ${1-/bin/bash} "${@:2}"
27 |     $xhost - $ip >/dev/null
28 | elif [[ $(uname) == 'Linux' ]]; then
29 |     $xhost +local:root >/dev/null
30 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
31 |         -e DISPLAY=$DISPLAY \
32 |         -v /tmp/.X11-unix:/tmp/.X11-unix \
33 |         -v "$DIR":/root/code/bootcamp_pg \
34 |         -ti dementrock/deeprlbootcamp \
35 |           ${1-/bin/bash} "${@:2}"
36 |     $xhost -local:root >/dev/null
37 | else
38 |     echo "This script only supports macOS or Linux"
39 | fi
40 | 


--------------------------------------------------------------------------------
/labs/lab4/docker_run_vnc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | vnc_port=$("$DIR/findport.py" 3000 1)
 4 | viskit_port=$("$DIR/findport.py" 5000 1)
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284"
12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
13 |     -v "$DIR":/root/code/bootcamp_pg \
14 |     -ti dementrock/deeprlbootcamp \
15 |       ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}"
16 | 


--------------------------------------------------------------------------------
/labs/lab4/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | channels:
 3 |     - menpo
 4 |     - soumith
 5 | dependencies:
 6 |     - python==3.5.3
 7 |     - opencv3=3.1.0
 8 |     - numpy==1.13.1
 9 |     - scipy==0.19.1
10 |     - pip:
11 |         - gym==0.9.2
12 |         - chainer==2.0.1
13 |         - ipdb==0.10.3
14 |         - tblib==1.3.2
15 |         - atari_py==0.1.1
16 |         - Pillow==4.2.1
17 |         - PyOpenGL==3.1.0
18 |         - cloudpickle==0.3.1
19 |         - click==6.7
20 |         - python-dateutil==2.6.1
21 |         - pyyaml==3.12
22 |         - easydict==1.7
23 |         - boto3==1.4.4
24 |         - mako==1.0.7
25 |         - redis==2.10.5
26 |         - Flask==0.12.2
27 |         - plotly==2.0.12
28 |         - tqdm==4.14.0
29 |         - cupy==1.0.1; 'linux' in sys_platform
30 |         - cached-property==1.3.0
31 |         - h5py==2.7.0
32 | 


--------------------------------------------------------------------------------
/labs/lab4/experiments/run_a2c_breakout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from algs import a2c
 3 | from env_makers import EnvMaker
 4 | from models import CategoricalCNNPolicy
 5 | from utils import SnapshotSaver
 6 | import numpy as np
 7 | import os
 8 | import logger
 9 | 
10 | log_dir = "data/local/a2c-breakout"
11 | 
12 | np.random.seed(42)
13 | 
14 | # Clean up existing logs
15 | os.system("rm -rf {}".format(log_dir))
16 | 
17 | with logger.session(log_dir):
18 |     env_maker = EnvMaker('BreakoutNoFrameskip-v4')
19 |     env = env_maker.make()
20 |     policy = CategoricalCNNPolicy(
21 |         env.observation_space, env.action_space, env.spec)
22 |     vf = policy.create_vf()
23 |     a2c(
24 |         env=env,
25 |         env_maker=env_maker,
26 |         n_envs=16,
27 |         policy=policy,
28 |         vf=vf,
29 |         snapshot_saver=SnapshotSaver(log_dir, interval=10),
30 |     )
31 | 


--------------------------------------------------------------------------------
/labs/lab4/experiments/run_a2c_pong.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from algs import a2c
 3 | from env_makers import EnvMaker
 4 | from models import CategoricalCNNPolicy
 5 | from utils import SnapshotSaver
 6 | import numpy as np
 7 | import os
 8 | import logger
 9 | 
10 | log_dir = "data/local/a2c-pong"
11 | 
12 | np.random.seed(42)
13 | 
14 | # Clean up existing logs
15 | os.system("rm -rf {}".format(log_dir))
16 | 
17 | with logger.session(log_dir):
18 |     env_maker = EnvMaker('PongNoFrameskip-v4')
19 |     env = env_maker.make()
20 |     policy = CategoricalCNNPolicy(
21 |         env.observation_space, env.action_space, env.spec)
22 |     vf = policy.create_vf()
23 |     a2c(
24 |         env=env,
25 |         env_maker=env_maker,
26 |         n_envs=16,
27 |         policy=policy,
28 |         vf=vf,
29 |         snapshot_saver=SnapshotSaver(log_dir, interval=10),
30 |     )
31 | 


--------------------------------------------------------------------------------
/labs/lab4/experiments/run_a2c_pong_warm_start.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from utils import SnapshotSaver
 3 | import numpy as np
 4 | import os
 5 | import logger
 6 | import pickle
 7 | 
 8 | log_dir = "data/local/a2c-pong-warm-start"
 9 | 
10 | np.random.seed(42)
11 | 
12 | # Clean up existing logs
13 | os.system("rm -rf {}".format(log_dir))
14 | 
15 | with logger.session(log_dir):
16 |     with open("pong_warm_start.pkl", "rb") as f:
17 |         state = pickle.load(f)
18 |     saver = SnapshotSaver(log_dir, interval=10)
19 |     alg_state = state['alg_state']
20 |     env = alg_state['env_maker'].make()
21 |     alg = state['alg']
22 |     alg(env=env, snapshot_saver=saver, **alg_state)
23 | 


--------------------------------------------------------------------------------
/labs/lab4/experiments/run_pg_cartpole.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import chainer
 3 | 
 4 | from algs import pg
 5 | from env_makers import EnvMaker
 6 | from models import CategoricalMLPPolicy, MLPBaseline
 7 | from utils import SnapshotSaver
 8 | import numpy as np
 9 | import os
10 | import logger
11 | 
12 | log_dir = "data/local/pg-cartpole"
13 | 
14 | np.random.seed(42)
15 | 
16 | # Clean up existing logs
17 | os.system("rm -rf {}".format(log_dir))
18 | 
19 | with logger.session(log_dir):
20 |     env_maker = EnvMaker('CartPole-v0')
21 |     env = env_maker.make()
22 |     policy = CategoricalMLPPolicy(observation_space=env.observation_space, action_space=env.action_space,
23 |                                   env_spec=env.spec)
24 |     baseline = MLPBaseline(observation_space=env.observation_space, action_space=env.action_space,
25 |                            env_spec=env.spec)
26 |     pg(
27 |         env=env,
28 |         env_maker=env_maker,
29 |         n_envs=16,
30 |         policy=policy,
31 |         baseline=baseline,
32 |         batch_size=2000,
33 |         n_iters=100,
34 |         snapshot_saver=SnapshotSaver(log_dir),
35 |         optimizer=chainer.optimizers.Adam(1e-2)
36 |     )
37 | 


--------------------------------------------------------------------------------
/labs/lab4/experiments/run_trpo_cartpole.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from algs import trpo
 3 | from env_makers import EnvMaker
 4 | from models import CategoricalMLPPolicy, MLPBaseline
 5 | from utils import SnapshotSaver
 6 | import numpy as np
 7 | import os
 8 | import logger
 9 | 
10 | log_dir = "data/local/trpo-cartpole"
11 | 
12 | np.random.seed(42)
13 | 
14 | # Clean up existing logs
15 | os.system("rm -rf {}".format(log_dir))
16 | 
17 | with logger.session(log_dir):
18 |     env_maker = EnvMaker('CartPole-v0')
19 |     env = env_maker.make()
20 |     policy = CategoricalMLPPolicy(
21 |         observation_space=env.observation_space,
22 |         action_space=env.action_space,
23 |         env_spec=env.spec
24 |     )
25 |     baseline = MLPBaseline(
26 |         observation_space=env.observation_space,
27 |         action_space=env.action_space,
28 |         env_spec=env.spec
29 |     )
30 |     trpo(
31 |         env=env,
32 |         env_maker=env_maker,
33 |         n_envs=16,
34 |         policy=policy,
35 |         baseline=baseline,
36 |         batch_size=2000,
37 |         n_iters=100,
38 |         snapshot_saver=SnapshotSaver(log_dir)
39 |     )
40 | 


--------------------------------------------------------------------------------
/labs/lab4/experiments/run_trpo_half_cheetah.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import chainer
 3 | 
 4 | from algs import trpo
 5 | from env_makers import EnvMaker
 6 | from models import GaussianMLPPolicy, MLPBaseline
 7 | from utils import SnapshotSaver
 8 | import numpy as np
 9 | import os
10 | import logger
11 | 
12 | log_dir = "data/local/trpo-half-cheetah"
13 | 
14 | np.random.seed(42)
15 | 
16 | # Clean up existing logs
17 | os.system("rm -rf {}".format(log_dir))
18 | 
19 | with logger.session(log_dir):
20 |     env_maker = EnvMaker('RoboschoolHalfCheetah-v1')
21 |     env = env_maker.make()
22 |     policy = GaussianMLPPolicy(
23 |         observation_space=env.observation_space,
24 |         action_space=env.action_space,
25 |         env_spec=env.spec,
26 |         hidden_sizes=(256, 64),
27 |         hidden_nonlinearity=chainer.functions.tanh,
28 |     )
29 |     baseline = MLPBaseline(
30 |         observation_space=env.observation_space,
31 |         action_space=env.action_space,
32 |         env_spec=env.spec,
33 |         hidden_sizes=(256, 64),
34 |         hidden_nonlinearity=chainer.functions.tanh,
35 |     )
36 |     trpo(
37 |         env=env,
38 |         env_maker=env_maker,
39 |         n_envs=16,
40 |         policy=policy,
41 |         baseline=baseline,
42 |         batch_size=5000,
43 |         n_iters=5000,
44 |         snapshot_saver=SnapshotSaver(log_dir, interval=10),
45 |     )
46 | 


--------------------------------------------------------------------------------
/labs/lab4/experiments/run_trpo_pendulum.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import chainer
 3 | 
 4 | from algs import trpo
 5 | from env_makers import EnvMaker
 6 | from models import GaussianMLPPolicy, MLPBaseline
 7 | from utils import SnapshotSaver
 8 | import numpy as np
 9 | import os
10 | import logger
11 | 
12 | log_dir = "data/local/trpo-pendulum"
13 | 
14 | np.random.seed(42)
15 | 
16 | # Clean up existing logs
17 | os.system("rm -rf {}".format(log_dir))
18 | 
19 | with logger.session(log_dir):
20 |     env_maker = EnvMaker('Pendulum-v0')
21 |     env = env_maker.make()
22 |     policy = GaussianMLPPolicy(
23 |         observation_space=env.observation_space,
24 |         action_space=env.action_space,
25 |         env_spec=env.spec,
26 |         hidden_sizes=(64, 64),
27 |         hidden_nonlinearity=chainer.functions.tanh,
28 |     )
29 |     baseline = MLPBaseline(
30 |         observation_space=env.observation_space,
31 |         action_space=env.action_space,
32 |         env_spec=env.spec,
33 |         hidden_sizes=(64, 64),
34 |         hidden_nonlinearity=chainer.functions.tanh,
35 |     )
36 |     trpo(
37 |         env=env,
38 |         env_maker=env_maker,
39 |         n_envs=16,
40 |         policy=policy,
41 |         baseline=baseline,
42 |         batch_size=10000,
43 |         n_iters=100,
44 |         snapshot_saver=SnapshotSaver(log_dir),
45 |     )
46 | 


--------------------------------------------------------------------------------
/labs/lab4/findport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Usage: findport.py 3000 100
 4 | #
 5 | from __future__ import print_function
 6 | import socket
 7 | from contextlib import closing
 8 | import sys
 9 | 
10 | if len(sys.argv) != 3:
11 |     print("Usage: {} <base_port> <increment>".format(sys.argv[0]))
12 |     sys.exit(1)
13 | 
14 | base = int(sys.argv[1])
15 | increment = int(sys.argv[2])
16 | 
17 | 
18 | def find_free_port():
19 |     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
20 |         for port in range(base, 65536, increment):
21 |             try:
22 |                 s.bind(('', port))
23 |                 return s.getsockname()[1]
24 |             except socket.error:
25 |                 continue
26 | 
27 | 
28 | print(find_free_port())
29 | 


--------------------------------------------------------------------------------
/labs/lab4/launch_bg_screen_buffer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | killall() {
 4 | kill -INT "$xvfb_pid" 
 5 | kill -INT "$x11vnc_pid" 
 6 | exit
 7 | }
 8 | 
 9 | trap killall SIGINT
10 | trap killall SIGTERM
11 | trap killall SIGKILL
12 | 
13 | Xvfb :99 -screen 0 1024x768x24 -ac  +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$!
14 | 
15 | mkdir ~/.x11vnc
16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd
17 | 
18 | command="${1-/bin/bash} ${@:2}"
19 | 
20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!"
21 | 
22 | DISPLAY=:99 $command
23 | 
24 | killall
25 | 


--------------------------------------------------------------------------------
/labs/lab4/logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | See README.md for a description of the logging API.
  4 | 
  5 | OFF state corresponds to having Logger.CURRENT == Logger.DEFAULT
  6 | ON state is otherwise
  7 | 
  8 | """
  9 | import datetime
 10 | from collections import OrderedDict
 11 | import os
 12 | import sys
 13 | import shutil
 14 | import os.path as osp
 15 | import json
 16 | 
 17 | import dateutil.tz
 18 | 
 19 | LOG_OUTPUT_FORMATS = ['stdout', 'log', 'json']
 20 | 
 21 | DEBUG = 10
 22 | INFO = 20
 23 | WARN = 30
 24 | ERROR = 40
 25 | 
 26 | DISABLED = 50
 27 | 
 28 | 
 29 | class OutputFormat(object):
 30 |     def writekvs(self, kvs):
 31 |         """
 32 |         Write key-value pairs
 33 |         """
 34 |         raise NotImplementedError
 35 | 
 36 |     def writeseq(self, args):
 37 |         """
 38 |         Write a sequence of other data (e.g. a logging message)
 39 |         """
 40 |         pass
 41 | 
 42 |     def close(self):
 43 |         return
 44 | 
 45 | 
 46 | class HumanOutputFormat(OutputFormat):
 47 |     def __init__(self, file):
 48 |         self.file = file
 49 | 
 50 |     def writekvs(self, kvs):
 51 |         # Create strings for printing
 52 |         key2str = OrderedDict()
 53 |         for (key, val) in kvs.items():
 54 |             valstr = '%-8.5g' % (val,) if hasattr(val, '__float__') else val
 55 |             key2str[self._truncate(key)] = self._truncate(valstr)
 56 | 
 57 |         # Find max widths
 58 |         keywidth = max(map(len, key2str.keys()))
 59 |         valwidth = max(map(len, key2str.values()))
 60 | 
 61 |         # Write out the data
 62 |         dashes = '-' * (keywidth + valwidth + 7)
 63 |         lines = [dashes]
 64 |         for (key, val) in key2str.items():
 65 |             lines.append('| %s%s | %s%s |' % (
 66 |                 key,
 67 |                 ' ' * (keywidth - len(key)),
 68 |                 val,
 69 |                 ' ' * (valwidth - len(val)),
 70 |             ))
 71 |         lines.append(dashes)
 72 |         self.file.write('\n'.join(lines) + '\n')
 73 | 
 74 |         # Flush the output to the file
 75 |         self.file.flush()
 76 | 
 77 |     def _truncate(self, s):
 78 |         return s[:20] + '...' if len(s) > 23 else s
 79 | 
 80 |     def writeseq(self, args):
 81 |         for arg in args:
 82 |             self.file.write(arg)
 83 |         self.file.write('\n')
 84 |         self.file.flush()
 85 | 
 86 | 
 87 | class JSONOutputFormat(OutputFormat):
 88 |     def __init__(self, file):
 89 |         self.file = file
 90 | 
 91 |     def writekvs(self, kvs):
 92 |         for k, v in kvs.items():
 93 |             if hasattr(v, 'dtype'):
 94 |                 v = v.tolist()
 95 |                 kvs[k] = float(v)
 96 |         self.file.write(json.dumps(kvs) + '\n')
 97 |         self.file.flush()
 98 | 
 99 | 
100 | def make_output_format(format, ev_dir):
101 |     os.makedirs(ev_dir, exist_ok=True)
102 |     if format == 'stdout':
103 |         return HumanOutputFormat(sys.stdout)
104 |     elif format == 'log':
105 |         log_file = open(osp.join(ev_dir, 'log.txt'), 'at')
106 |         return HumanOutputFormat(log_file)
107 |     elif format == 'json':
108 |         json_file = open(osp.join(ev_dir, 'progress.json'), 'at')
109 |         return JSONOutputFormat(json_file)
110 |     else:
111 |         raise ValueError('Unknown format specified: %s' % (format,))
112 | 
113 | 
114 | # ================================================================
115 | # API
116 | # ================================================================
117 | 
118 | 
119 | def logkv(key, val):
120 |     """
121 |     Log a value of some diagnostic
122 |     Call this once for each diagnostic quantity, each iteration
123 |     """
124 |     Logger.CURRENT.logkv(key, val)
125 | 
126 | 
127 | def dumpkvs():
128 |     """
129 |     Write all of the diagnostics from the current iteration
130 | 
131 |     level: int. (see old_logger.py docs) If the global logger level is higher than
132 |                 the level argument here, don't print to stdout.
133 |     """
134 |     Logger.CURRENT.dumpkvs()
135 | 
136 | 
137 | # for backwards compatibility
138 | record_tabular = logkv
139 | dump_tabular = dumpkvs
140 | 
141 | 
142 | def log(*args, level=INFO):
143 |     """
144 |     Write the sequence of args, with no separators, to the console and output files (if you've configured an output file).
145 |     """
146 |     Logger.CURRENT.log(*args, level=level)
147 | 
148 | 
149 | def debug(*args):
150 |     log(*args, level=DEBUG)
151 | 
152 | 
153 | def info(*args):
154 |     log(*args, level=INFO)
155 | 
156 | 
157 | def warn(*args):
158 |     log(*args, level=WARN)
159 | 
160 | 
161 | def error(*args):
162 |     log(*args, level=ERROR)
163 | 
164 | 
165 | def set_level(level):
166 |     """
167 |     Set logging threshold on current logger.
168 |     """
169 |     Logger.CURRENT.set_level(level)
170 | 
171 | 
172 | def get_level():
173 |     """
174 |     Set logging threshold on current logger.
175 |     """
176 |     return Logger.CURRENT.level
177 | 
178 | 
179 | def get_dir():
180 |     """
181 |     Get directory that log files are being written to.
182 |     will be None if there is no output directory (i.e., if you didn't call start)
183 |     """
184 |     return Logger.CURRENT.get_dir()
185 | 
186 | 
187 | def get_expt_dir():
188 |     sys.stderr.write(
189 |         "get_expt_dir() is Deprecated. Switch to get_dir() [%s]\n" % (get_dir(),))
190 |     return get_dir()
191 | 
192 | 
193 | # ================================================================
194 | # Backend
195 | # ================================================================
196 | 
197 | 
198 | class Logger(object):
199 |     # A logger with no output files. (See right below class definition)
200 |     DEFAULT = None
201 |     # So that you can still log to the terminal without setting up any output files
202 |     CURRENT = None  # Current logger being used by the free functions above
203 | 
204 |     def __init__(self, dir, output_formats):
205 |         self.name2val = OrderedDict()  # values this iteration
206 |         self.level = INFO
207 |         self.dir = dir
208 |         self.output_formats = output_formats
209 | 
210 |     # Logging API, forwarded
211 |     # ----------------------------------------
212 |     def logkv(self, key, val):
213 |         self.name2val[key] = val
214 | 
215 |     def dumpkvs(self):
216 |         for fmt in self.output_formats:
217 |             fmt.writekvs(self.name2val)
218 |         self.name2val.clear()
219 | 
220 |     def log(self, *args, level=INFO):
221 |         now = datetime.datetime.now(dateutil.tz.tzlocal())
222 |         timestamp = now.strftime('[%Y-%m-%d %H:%M:%S.%f %Z] ')
223 |         if self.level <= level:
224 |             self._do_log((timestamp,) + args)
225 | 
226 |     # Configuration
227 |     # ----------------------------------------
228 |     def set_level(self, level):
229 |         self.level = level
230 | 
231 |     def get_dir(self):
232 |         return self.dir
233 | 
234 |     def close(self):
235 |         for fmt in self.output_formats:
236 |             fmt.close()
237 | 
238 |     # Misc
239 |     # ----------------------------------------
240 |     def _do_log(self, args):
241 |         for fmt in self.output_formats:
242 |             fmt.writeseq(args)
243 | 
244 | 
245 | # ================================================================
246 | 
247 | Logger.DEFAULT = Logger(
248 |     output_formats=[HumanOutputFormat(sys.stdout)], dir=None)
249 | Logger.CURRENT = Logger.DEFAULT
250 | 
251 | 
252 | class session(object):
253 |     """
254 |     Context manager that sets up the loggers for an experiment.
255 |     """
256 | 
257 |     CURRENT = None  # Set to a LoggerContext object using enter/exit or context manager
258 | 
259 |     def __init__(self, dir, format_strs=None):
260 |         self.dir = dir
261 |         if format_strs is None:
262 |             format_strs = LOG_OUTPUT_FORMATS
263 |         output_formats = [make_output_format(f, dir) for f in format_strs]
264 |         Logger.CURRENT = Logger(dir=dir, output_formats=output_formats)
265 | 
266 |     def __enter__(self):
267 |         os.makedirs(self.evaluation_dir(), exist_ok=True)
268 |         output_formats = [make_output_format(
269 |             f, self.evaluation_dir()) for f in LOG_OUTPUT_FORMATS]
270 |         Logger.CURRENT = Logger(dir=self.dir, output_formats=output_formats)
271 | 
272 |     def __exit__(self, *args):
273 |         Logger.CURRENT.close()
274 |         Logger.CURRENT = Logger.DEFAULT
275 | 
276 |     def evaluation_dir(self):
277 |         return self.dir
278 | 
279 | 
280 | # ================================================================
281 | 
282 | 
283 | def _demo():
284 |     info("hi")
285 |     debug("shouldn't appear")
286 |     set_level(DEBUG)
287 |     debug("should appear")
288 |     dir = "/tmp/testlogging"
289 |     if os.path.exists(dir):
290 |         shutil.rmtree(dir)
291 |     with session(dir=dir):
292 |         record_tabular("a", 3)
293 |         record_tabular("b", 2.5)
294 |         dump_tabular()
295 |         record_tabular("b", -2.5)
296 |         record_tabular("a", 5.5)
297 |         dump_tabular()
298 |         info("^^^ should see a = 5.5")
299 | 
300 |     record_tabular("b", -2.5)
301 |     dump_tabular()
302 | 
303 |     record_tabular("a", "longasslongasslongasslongasslongasslongassvalue")
304 |     dump_tabular()
305 | 
306 | 
307 | if __name__ == "__main__":
308 |     _demo()
309 | 


--------------------------------------------------------------------------------
/labs/lab4/pg.py:
--------------------------------------------------------------------------------
  1 | from alg_utils import *
  2 | from simplepg.simple_utils import test_once, nprs
  3 | import tests.pg_tests
  4 | 
  5 | 
  6 | def pg(env, env_maker, policy, baseline, n_envs=mp.cpu_count(), last_iter=-1, n_iters=100, batch_size=1000,
  7 |        optimizer=chainer.optimizers.Adam(), discount=0.99, gae_lambda=0.97, snapshot_saver=None):
  8 |     """
  9 |     This method implements policy gradient algorithm.
 10 |     :param env: An environment instance, which should have the same class as what env_maker.make() returns.
 11 |     :param env_maker: An object such that calling env_maker.make() will generate a new environment.
 12 |     :param policy: A stochastic policy which we will be optimizing.
 13 |     :param baseline: A baseline used for variance reduction and estimating future returns for unfinished trajectories.
 14 |     :param n_envs: Number of environments running simultaneously.
 15 |     :param last_iter: The index of the last iteration. This is normally -1 when starting afresh, but may be different when
 16 |            loaded from a snapshot.
 17 |     :param n_iters: The total number of iterations to run.
 18 |     :param batch_size: The number of samples used per iteration.
 19 |     :param optimizer: A Chainer optimizer instance. By default we use the Adam algorithm with learning rate 1e-3.
 20 |     :param discount: Discount factor.
 21 |     :param gae_lambda: Lambda parameter used for generalized advantage estimation.
 22 |     :param snapshot_saver: An object for saving snapshots.
 23 |     """
 24 | 
 25 |     if getattr(optimizer, 'target', None) is not policy:
 26 |         optimizer.setup(policy)
 27 | 
 28 |     logger.info("Starting env pool")
 29 |     with EnvPool(env_maker, n_envs=n_envs) as env_pool:
 30 |         for iter in range(last_iter + 1, n_iters):
 31 |             logger.info("Starting iteration {}".format(iter))
 32 |             logger.logkv('Iteration', iter)
 33 | 
 34 |             logger.info("Start collecting samples")
 35 |             trajs = parallel_collect_samples(env_pool, policy, batch_size)
 36 | 
 37 |             logger.info("Computing input variables for policy optimization")
 38 |             all_obs, all_acts, all_advs, _ = compute_pg_vars(
 39 |                 trajs, policy, baseline, discount, gae_lambda
 40 |             )
 41 | 
 42 |             # Begin policy update
 43 | 
 44 |             # Now, you need to implement the computation of the policy gradient
 45 |             # The policy gradient is given by -1/T \sum_t \nabla_\theta(log(p_\theta(a_t|s_t))) * A_t
 46 |             # Note the negative sign in the front, since optimizers are most often minimizing a loss rather
 47 |             # This is the same as \nabla_\theta(-1/T \sum_t log(p_\theta(a_t|s_t)) * A_t) = \nabla_\theta(L), where L is the surrogate loss term
 48 | 
 49 |             logger.info("Computing policy gradient")
 50 | 
 51 |             # Methods that may be useful:
 52 |             # - `dists.logli(actions)' returns the log probability of the actions under the distribution `dists'.
 53 |             #   This method returns a chainer variable.
 54 | 
 55 |             dists = policy.compute_dists(all_obs)
 56 | 
 57 |             def compute_surr_loss(dists, all_acts, all_advs):
 58 |                 """
 59 |                 :param dists: An instance of subclass of Distribution
 60 |                 :param all_acts: A chainer variable, which should be a matrix of size N * |A|
 61 |                 :param all_advs: A chainer variable, which should be a vector of size N
 62 |                 :return: A chainer variable, which should be a scalar
 63 |                 """
 64 |                 return -F.mean(dists.logli(all_acts)*all_advs)
 65 | 
 66 |             test_once(compute_surr_loss)
 67 | 
 68 |             surr_loss = compute_surr_loss(dists, all_acts, all_advs)
 69 | 
 70 |             # reset gradients stored in the policy parameters
 71 |             policy.cleargrads()
 72 |             surr_loss.backward()
 73 | 
 74 |             # apply the computed gradient
 75 |             optimizer.update()
 76 | 
 77 |             # Update baseline
 78 |             logger.info("Updating baseline")
 79 |             baseline.update(trajs)
 80 | 
 81 |             # log statistics
 82 |             logger.info("Computing logging information")
 83 |             logger.logkv('SurrLoss', surr_loss.data)
 84 |             log_action_distribution_statistics(dists)
 85 |             log_reward_statistics(env)
 86 |             log_baseline_statistics(trajs)
 87 |             logger.dumpkvs()
 88 | 
 89 |             if snapshot_saver is not None:
 90 |                 logger.info("Saving snapshot")
 91 |                 snapshot_saver.save_state(
 92 |                     iter,
 93 |                     dict(
 94 |                         alg=pg,
 95 |                         alg_state=dict(
 96 |                             env_maker=env_maker,
 97 |                             policy=policy,
 98 |                             baseline=baseline,
 99 |                             n_envs=n_envs,
100 |                             last_iter=iter,
101 |                             n_iters=n_iters,
102 |                             batch_size=batch_size,
103 |                             optimizer=optimizer,
104 |                             discount=discount,
105 |                             gae_lambda=gae_lambda
106 |                         )
107 |                     )
108 |                 )
109 | 


--------------------------------------------------------------------------------
/labs/lab4/pong_warm_start.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/pong_warm_start.pkl


--------------------------------------------------------------------------------
/labs/lab4/scripts/resume_training.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from utils import SnapshotSaver
 3 | import click
 4 | import logger
 5 | 
 6 | 
 7 | @click.command()
 8 | @click.argument("dir")  # , "Directory which contains snapshot files")
 9 | @click.option("--interval", help="Interval between saving snapshots", type=int, default=10)
10 | def main(dir, interval):
11 |     with logger.session(dir):
12 |         saver = SnapshotSaver(dir, interval=interval)
13 |         state = saver.get_state()
14 |         alg_state = state['alg_state']
15 |         env = alg_state['env_maker'].make()
16 |         alg = state['alg']
17 |         alg(env=env, snapshot_saver=saver, **alg_state)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     main()
22 | 


--------------------------------------------------------------------------------
/labs/lab4/scripts/setup_xquartz.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Check if XQuartz is installed
 3 | 
 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@"
 5 | 
 6 | app_dir=/Applications/Utilities/XQuartz.app
 7 | 
 8 | if [ -d $app_dir ]; then
 9 |     # Check installed version
10 |     app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString)
11 |     if [ $app_version == "2.7.11" ]; then
12 |         defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
13 |         defaults write org.macosforge.xquartz.X11 no_auth -bool false
14 |         defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
15 |         echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!"
16 |         exit
17 |     else
18 |         read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response
19 |         case "$response" in
20 |             [yY][eE][sS]|[yY]) 
21 |                 ;;
22 |             *)
23 |                 exit
24 |                 ;;
25 |         esac
26 |     fi
27 | fi
28 | 
29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg
30 | dmg_path=/tmp/xquartz.dmg
31 | echo "Downloading dmg from $url..."
32 | /usr/bin/curl -L -o $dmg_path $url
33 | echo "Mounting dmg file..."
34 | hdiutil mount $dmg_path
35 | echo "Installing..."
36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg  -target /
37 | 
38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false
40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
41 | 
42 | echo "Done! Make sure to log out and then log back in for the changes to take effect."
43 | 


--------------------------------------------------------------------------------
/labs/lab4/scripts/sim_policy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from utils import SnapshotSaver
 3 | import click
 4 | import time
 5 | import os
 6 | 
 7 | 
 8 | @click.command()
 9 | @click.argument("dir")
10 | def main(dir):
11 |     env = None
12 |     while True:
13 |         saver = SnapshotSaver(dir)
14 |         state = saver.get_state()
15 |         if state is None:
16 |             time.sleep(1)
17 |             continue
18 |         alg_state = state['alg_state']
19 |         if env is None:
20 |             env = alg_state['env_maker'].make()
21 |         policy = alg_state['policy']
22 |         ob = env.reset()
23 |         done = False
24 |         while not done:
25 |             action, _ = policy.get_action(ob)
26 |             ob, _, done, _ = env.step(action)
27 |             env.render()
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/labs/lab4/scripts/test_environment_setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | def main():
 5 |     import roboschool
 6 |     import gym
 7 |     import chainer
 8 |     env = gym.make('CartPole-v0')
 9 |     env.reset()
10 |     env.step(env.action_space.sample())
11 |     env = gym.make('RoboschoolHalfCheetah-v1')
12 |     env.reset()
13 |     env.step(env.action_space.sample())
14 |     print("Your environment has been successfully set up!")
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/labs/lab4/simplepg/__pycache__/point_env.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/simplepg/__pycache__/point_env.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/lab4/simplepg/__pycache__/simple_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/simplepg/__pycache__/simple_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/lab4/simplepg/point_env.py:
--------------------------------------------------------------------------------
 1 | from gym import Env
 2 | from gym.envs.registration import register
 3 | from gym.utils import seeding
 4 | from gym import spaces
 5 | from gym.envs.classic_control.cartpole import CartPoleEnv
 6 | import numpy as np
 7 | 
 8 | 
 9 | class PointEnv(Env):
10 |     metadata = {
11 |         'render.modes': ['human', 'rgb_array'],
12 |         'video.frames_per_second': 50
13 |     }
14 | 
15 |     def __init__(self):
16 |         self.action_space = spaces.Box(low=-1, high=1, shape=(2,))
17 |         self.observation_space = spaces.Box(low=-1, high=1, shape=(2,))
18 | 
19 |         self._seed()
20 |         self.viewer = None
21 |         self.state = None
22 | 
23 |     def _seed(self, seed=None):
24 |         self.np_random, seed = seeding.np_random(seed)
25 |         return [seed]
26 | 
27 |     def _step(self, action):
28 |         action = np.clip(action, -0.025, 0.025)
29 |         self.state = np.clip(self.state + action, -1, 1)
30 |         return np.array(self.state), -np.linalg.norm(self.state), False, {}
31 | 
32 |     def _reset(self):
33 |         while True:
34 |             self.state = self.np_random.uniform(low=-1, high=1, size=(2,))
35 |             # Sample states that are far away
36 |             if np.linalg.norm(self.state) > 0.9:
37 |                 break
38 |         return np.array(self.state)
39 | 
40 |     # def _render(self, mode='human', close=False):
41 |     #     pass
42 | 
43 |     def _render(self, mode='human', close=False):
44 |         if close:
45 |             if self.viewer is not None:
46 |                 self.viewer.close()
47 |                 self.viewer = None
48 |             return
49 | 
50 |         screen_width = 800
51 |         screen_height = 800
52 | 
53 |         if self.viewer is None:
54 |             from gym.envs.classic_control import rendering
55 |             self.viewer = rendering.Viewer(screen_width, screen_height)
56 | 
57 |             agent = rendering.make_circle(
58 |                 min(screen_height, screen_width) * 0.03)
59 |             origin = rendering.make_circle(
60 |                 min(screen_height, screen_width) * 0.03)
61 |             trans = rendering.Transform(translation=(0, 0))
62 |             agent.add_attr(trans)
63 |             self.trans = trans
64 |             agent.set_color(1, 0, 0)
65 |             origin.set_color(0, 0, 0)
66 |             origin.add_attr(rendering.Transform(
67 |                 translation=(screen_width // 2, screen_height // 2)))
68 |             self.viewer.add_geom(agent)
69 |             self.viewer.add_geom(origin)
70 | 
71 |         # self.trans.set_translation(0, 0)
72 |         self.trans.set_translation(
73 |             (self.state[0] + 1) / 2 * screen_width,
74 |             (self.state[1] + 1) / 2 * screen_height,
75 |         )
76 | 
77 |         return self.viewer.render(return_rgb_array=mode == 'rgb_array')
78 | 
79 | 
80 | register(
81 |     'Point-v0',
82 |     entry_point='simplepg.point_env:PointEnv',
83 |     timestep_limit=40,
84 | )
85 | 


--------------------------------------------------------------------------------
/labs/lab4/simplepg/rollout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import click
 3 | import numpy as np
 4 | import gym
 5 | 
 6 | from simplepg.simple_utils import include_bias, weighted_sample
 7 | 
 8 | 
 9 | def point_get_action(theta, ob, rng=np.random):
10 |     ob_1 = include_bias(ob)
11 |     mean = theta.dot(ob_1)
12 |     return rng.normal(loc=mean, scale=1.)
13 | 
14 | 
15 | def cartpole_get_action(theta, ob, rng=np.random):
16 |     ob_1 = include_bias(ob)
17 |     logits = ob_1.dot(theta.T)
18 |     return weighted_sample(logits, rng=rng)
19 | 
20 | 
21 | @click.command()
22 | @click.argument("env_id", type=str, default="Point-v0")
23 | def main(env_id):
24 |     # Register the environment
25 |     rng = np.random.RandomState(42)
26 | 
27 |     if env_id == 'CartPole-v0':
28 |         env = gym.make('CartPole-v0')
29 |         get_action = cartpole_get_action
30 |         obs_dim = env.observation_space.shape[0]
31 |         action_dim = env.action_space.n
32 |     elif env_id == 'Point-v0':
33 |         from simplepg import point_env
34 |         env = gym.make('Point-v0')
35 |         get_action = point_get_action
36 |         obs_dim = env.observation_space.shape[0]
37 |         action_dim = env.action_space.shape[0]
38 |     else:
39 |         raise ValueError(
40 |             "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'")
41 | 
42 |     env.seed(42)
43 | 
44 |     # Initialize parameters
45 |     theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1))
46 | 
47 |     while True:
48 |         ob = env.reset()
49 |         done = False
50 |         # Only render the first trajectory
51 |         # Collect a new trajectory
52 |         rewards = []
53 |         while not done:
54 |             action = get_action(theta, ob, rng=rng)
55 |             next_ob, rew, done, _ = env.step(action)
56 |             ob = next_ob
57 |             env.render()
58 |             rewards.append(rew)
59 | 
60 |         print("Episode reward: %.2f" % np.sum(rewards))
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     main()
65 | 


--------------------------------------------------------------------------------
/labs/lab4/simplepg/simple_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.special
  3 | import chainer
  4 | 
  5 | 
  6 | # Compute gradient approximately using finite difference
  7 | def numerical_grad(f, x, eps=1e-8):
  8 |     grad = np.zeros_like(x)
  9 |     for i in range(len(x)):
 10 |         xplus = np.array(x)
 11 |         xplus[i] += eps
 12 |         fplus = f(xplus)
 13 |         xminus = np.array(x)
 14 |         xminus[i] -= eps
 15 |         fminus = f(xminus)
 16 |         grad[i] = (fplus - fminus) / (2 * eps)
 17 |     return grad
 18 | 
 19 | 
 20 | def gradient_check(f, g, x):
 21 |     # Test the implementation of g(x) = df/dx
 22 |     # Perform numerical differentiation and test it
 23 |     g_num = numerical_grad(f, x)
 24 |     g_test = g(x)
 25 |     try:
 26 |         np.testing.assert_allclose(g_num, g_test, rtol=1e-5)
 27 |         print("Gradient check passed!")
 28 |     except AssertionError as e:
 29 |         print(e)
 30 |         print("Error: Gradient check didn't pass!")
 31 |         exit()
 32 | 
 33 | 
 34 | def log_softmax(logits):
 35 |     return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True)
 36 | 
 37 | 
 38 | def softmax(logits):
 39 |     x = logits
 40 |     x = x - np.max(x, axis=-1, keepdims=True)
 41 |     x = np.exp(x)
 42 |     return x / np.sum(x, axis=-1, keepdims=True)
 43 | 
 44 | 
 45 | def weighted_sample(logits, rng=np.random):
 46 |     weights = softmax(logits)
 47 |     return min(
 48 |         int(np.sum(rng.uniform() > np.cumsum(weights))),
 49 |         len(weights) - 1
 50 |     )
 51 | 
 52 | 
 53 | def include_bias(x):
 54 |     # Add a constant term (1.0) to each entry in x
 55 |     return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1)
 56 | 
 57 | 
 58 | _tested = set()
 59 | _tests = dict()
 60 | 
 61 | nprs = np.random.RandomState
 62 | 
 63 | 
 64 | def register_test(fn_name, kwargs, desired_output=None):
 65 |     assert fn_name not in _tests
 66 |     _tests[fn_name] = (kwargs, desired_output)
 67 | 
 68 | 
 69 | def assert_allclose(a, b):
 70 |     if isinstance(a, (np.ndarray, float, int)):
 71 |         np.testing.assert_allclose(a, b, rtol=1e-5)
 72 |     elif isinstance(a, (tuple, list)):
 73 |         assert isinstance(b, (tuple, list))
 74 |         assert len(a) == len(b)
 75 |         for a_i, b_i in zip(a, b):
 76 |             assert_allclose(a_i, b_i)
 77 |     elif isinstance(a, chainer.Variable):
 78 |         assert isinstance(b, chainer.Variable)
 79 |         assert_allclose(a.data, b.data)
 80 |     else:
 81 |         raise NotImplementedError
 82 | 
 83 | 
 84 | def test_once(fn):
 85 |     module = fn.__module__
 86 |     name = fn.__name__
 87 |     key = module + "." + name
 88 |     if key in _tested:
 89 |         return
 90 |     assert key in _tests, "Test for %s not found!" % key
 91 |     kwargs, desired_output = _tests[key]
 92 |     _tested.add(key)
 93 | 
 94 |     if callable(kwargs):
 95 |         kwargs = kwargs()
 96 | 
 97 |     if callable(desired_output):
 98 |         desired_output = desired_output()
 99 | 
100 |     if desired_output is None:
101 |         print("Desired output for %s:" % key, repr(fn(**kwargs)))
102 |         exit()
103 |     else:
104 |         try:
105 |             output = fn(**kwargs)
106 |             assert_allclose(desired_output, output)
107 |             print("Test for %s passed!" % key)
108 |         except AssertionError as e:
109 |             print(e)
110 |             print("Error: test for %s didn't pass!" % key)
111 |             exit()
112 | 


--------------------------------------------------------------------------------
/labs/lab4/tests/__pycache__/a2c_tests.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/tests/__pycache__/a2c_tests.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/lab4/tests/__pycache__/pg_tests.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/tests/__pycache__/pg_tests.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/lab4/tests/__pycache__/simplepg_tests.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/tests/__pycache__/simplepg_tests.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/lab4/tests/__pycache__/trpo_tests.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/tests/__pycache__/trpo_tests.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/lab4/tests/a2c_tests.py:
--------------------------------------------------------------------------------
 1 | from simplepg.simple_utils import register_test, nprs
 2 | import numpy as np
 3 | from chainer import Variable
 4 | 
 5 | register_test(
 6 |     "a2c.compute_returns_advantages",
 7 |     kwargs=lambda: dict(
 8 |         rewards=nprs(0).uniform(size=(5, 2)),
 9 |         dones=nprs(1).choice([True, False], size=(5, 2)),
10 |         values=nprs(2).uniform(size=(5, 2)),
11 |         next_values=nprs(3).uniform(size=(2,)),
12 |         discount=0.99,
13 |     ),
14 |     desired_output=lambda: (
15 |         np.array([[1.14554925, 1.25462372],
16 |                   [0.60276338, 0.54488318],
17 |                   [2.33579066, 1.90456042],
18 |                   [1.93145037, 1.2713801],
19 |                   [1.50895268, 0.38344152]]),
20 |         np.array([[0.70955434, 1.22869749],
21 |                   [0.0531009, 0.10956079],
22 |                   [1.91542286, 1.5742256],
23 |                   [1.72680173, 0.65210914],
24 |                   [1.20929801, 0.11661424]])
25 |     )
26 | )
27 | 
28 | register_test(
29 |     "a2c.compute_total_loss",
30 |     kwargs=lambda: dict(
31 |         logli=Variable(nprs(0).uniform(size=(10,)).astype(np.float32)),
32 |         all_advs=Variable(nprs(1).uniform(size=(10,)).astype(np.float32)),
33 |         ent_coeff=nprs(2).uniform(),
34 |         ent=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)),
35 |         vf_loss_coeff=nprs(4).uniform(),
36 |         all_returns=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)),
37 |         all_values=Variable(nprs(6).uniform(size=(10,)).astype(np.float32)),
38 |     ),
39 |     desired_output=lambda: (
40 |         Variable(np.array(-0.4047563076019287, dtype=np.float32)),
41 |         Variable(np.array(0.22883716225624084, dtype=np.float32)),
42 |         Variable(np.array(-0.1834639459848404, dtype=np.float32))
43 |     )
44 | )
45 | 


--------------------------------------------------------------------------------
/labs/lab4/tests/pg_tests.py:
--------------------------------------------------------------------------------
 1 | from chainer import Variable
 2 | 
 3 | from simplepg.simple_utils import register_test, nprs
 4 | from utils import Gaussian
 5 | import numpy as np
 6 | 
 7 | register_test(
 8 |     "pg.compute_surr_loss",
 9 |     kwargs=lambda: dict(
10 |         dists=Gaussian(
11 |             means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)),
12 |             log_stds=Variable(nprs(1).uniform(
13 |                 size=(10, 3)).astype(np.float32)),
14 |         ),
15 |         all_acts=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)),
16 |         all_advs=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)),
17 |     ),
18 |     desired_output=lambda: Variable(
19 |         np.array(1.9201269149780273, dtype=np.float32))
20 | )
21 | 


--------------------------------------------------------------------------------
/labs/lab4/tests/simplepg_tests.py:
--------------------------------------------------------------------------------
 1 | from simplepg.simple_utils import register_test, nprs
 2 | import numpy as np
 3 | 
 4 | register_test(
 5 |     "__main__.compute_update",
 6 |     kwargs=lambda: dict(
 7 |         discount=0.99,
 8 |         R_tplus1=1.0,
 9 |         theta=nprs(0).uniform(size=(2, 2)),
10 |         s_t=nprs(1).uniform(size=(1,)),
11 |         a_t=nprs(2).choice(2),
12 |         r_t=nprs(3).uniform(),
13 |         b_t=nprs(4).uniform(),
14 |         get_grad_logp_action=lambda theta, *_: theta * 2
15 |     ),
16 |     desired_output=lambda: (
17 |         1.5407979025745755,
18 |         np.array([[0.62978332, 0.82070564], [0.69169275, 0.62527314]])
19 |     )
20 | )
21 | 
22 | register_test(
23 |     "__main__.compute_baselines",
24 |     kwargs=lambda: dict(
25 |         all_returns=[
26 |             nprs(0).uniform(size=(10,)),
27 |             nprs(1).uniform(size=(20,)),
28 |             [],
29 |         ],
30 |     ),
31 |     desired_output=lambda: np.array([0.61576628, 0.36728075, 0.])
32 | )
33 | 
34 | register_test(
35 |     "__main__.compute_fisher_matrix",
36 |     kwargs=lambda: dict(
37 |         theta=nprs(1).uniform(size=(2, 2)),
38 |         get_grad_logp_action=lambda theta, ob, action: np.exp(
39 |             theta) * np.linalg.norm(action),
40 |         all_observations=list(nprs(2).uniform(size=(5, 1))),
41 |         all_actions=list(nprs(3).choice(2, size=(5,))),
42 |     ),
43 |     desired_output=lambda: np.array([[0.92104469, 1.24739299, 0.60704379, 0.82124306],
44 |                                      [1.24739299, 1.68937435,
45 |                                          0.82213401, 1.11222925],
46 |                                      [0.60704379, 0.82213401,
47 |                                          0.40009151, 0.54126635],
48 |                                      [0.82124306, 1.11222925, 0.54126635, 0.73225564]])
49 | )
50 | 
51 | register_test(
52 |     "__main__.compute_natural_gradient",
53 |     kwargs=lambda: dict(
54 |         F=nprs(0).uniform(size=(4, 4)),
55 |         grad=nprs(1).uniform(size=(2, 2)),
56 |         reg=1e-3,
57 |     ),
58 |     desired_output=lambda: np.array(
59 |         [[-0.44691565, 0.5477328], [-0.20366472, 0.72267091]])
60 | )
61 | 
62 | register_test(
63 |     "__main__.compute_step_size",
64 |     kwargs=lambda: dict(
65 |         F=nprs(0).uniform(size=(2, 2)),
66 |         natural_grad=nprs(1).uniform(size=(1, 2)),
67 |         natural_step_size=1e-2,
68 |     ),
69 |     desired_output=lambda: 0.1607407366467048,
70 | )
71 | 


--------------------------------------------------------------------------------
/labs/lab4/tests/trpo_tests.py:
--------------------------------------------------------------------------------
 1 | from simplepg.simple_utils import register_test, nprs
 2 | import numpy as np
 3 | from chainer import Variable
 4 | 
 5 | from utils import Gaussian
 6 | 
 7 | register_test(
 8 |     "trpo.compute_surr_loss",
 9 |     kwargs=lambda: dict(
10 |         old_dists=Gaussian(
11 |             means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)),
12 |             log_stds=Variable(nprs(1).uniform(
13 |                 size=(10, 3)).astype(np.float32)),
14 |         ),
15 |         new_dists=Gaussian(
16 |             means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)),
17 |             log_stds=Variable(nprs(3).uniform(
18 |                 size=(10, 3)).astype(np.float32)),
19 |         ),
20 |         all_acts=Variable(nprs(4).uniform(size=(10, 3)).astype(np.float32)),
21 |         all_advs=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)),
22 |     ),
23 |     desired_output=lambda: Variable(
24 |         np.array(-0.5629823207855225, dtype=np.float32))
25 | )
26 | 
27 | register_test(
28 |     "trpo.compute_kl",
29 |     kwargs=lambda: dict(
30 |         old_dists=Gaussian(
31 |             means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)),
32 |             log_stds=Variable(nprs(1).uniform(
33 |                 size=(10, 3)).astype(np.float32)),
34 |         ),
35 |         new_dists=Gaussian(
36 |             means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)),
37 |             log_stds=Variable(nprs(3).uniform(
38 |                 size=(10, 3)).astype(np.float32)),
39 |         ),
40 |     ),
41 |     desired_output=lambda: Variable(
42 |         np.array(0.5306503176689148, dtype=np.float32))
43 | )
44 | 


--------------------------------------------------------------------------------
/labs/lab4/viskit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/viskit/__init__.py


--------------------------------------------------------------------------------
/labs/lab4/viskit/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/viskit/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/lab4/viskit/__pycache__/core.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab4/viskit/__pycache__/core.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/lab4/viskit/core.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import itertools
  3 | import json
  4 | import os
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | # from sandbox.rocky.utils.py_utils import AttrDict
 10 | 
 11 | class AttrDict(dict):
 12 |     def __init__(self, *args, **kwargs):
 13 |         super(AttrDict, self).__init__(*args, **kwargs)
 14 |         self.__dict__ = self
 15 | 
 16 | 
 17 | def unique(l):
 18 |     return list(set(l))
 19 | 
 20 | 
 21 | def flatten(l):
 22 |     return [item for sublist in l for item in sublist]
 23 | 
 24 | 
 25 | def load_progress(progress_json_path, verbose=True):
 26 |     if verbose:
 27 |         print("Reading %s" % progress_json_path)
 28 |     entries = dict()
 29 |     rows = []
 30 |     with open(progress_json_path, 'r') as f:
 31 |         lines = f.read().split('\n')
 32 |         for line in lines:
 33 |             if len(line) > 0:
 34 |                 row = json.loads(line)
 35 |                 rows.append(row)
 36 |     all_keys = set(k for row in rows for k in row.keys())
 37 |     for k in all_keys:
 38 |         if k not in entries:
 39 |             entries[k] = []
 40 |         for row in rows:
 41 |             if k in row:
 42 |                 v = row[k]
 43 |                 try:
 44 |                     entries[k].append(float(v))
 45 |                 except:
 46 |                     entries[k].append(np.nan)
 47 |             else:
 48 |                 entries[k].append(np.nan)
 49 | 
 50 |         # entries[key] = [row.get(key, np.nan) for row in rows]
 51 |         #         added_keys = set()
 52 |         #         for k, v in row.items():
 53 |         #             if k not in entries:
 54 |         #                 entries[k] = []
 55 |         #             try:
 56 |         #                 entries[k].append(float(v))
 57 |         #             except:
 58 |         #                 entries[k].append(0.)
 59 |         #             added_keys.add(k)
 60 |         #         for k in entries.keys():
 61 |         #             if k not in added_keys:
 62 |         #                 entries[k].append(np.nan)
 63 |     entries = dict([(k, np.array(v)) for k, v in entries.items()])
 64 |     return entries
 65 | 
 66 | 
 67 | def flatten_dict(d):
 68 |     flat_params = dict()
 69 |     for k, v in d.items():
 70 |         if isinstance(v, dict):
 71 |             v = flatten_dict(v)
 72 |             for subk, subv in flatten_dict(v).items():
 73 |                 flat_params[k + "." + subk] = subv
 74 |         else:
 75 |             flat_params[k] = v
 76 |     return flat_params
 77 | 
 78 | 
 79 | def load_params(params_json_path):
 80 |     with open(params_json_path, 'r') as f:
 81 |         data = json.loads(f.read())
 82 |         if "args_data" in data:
 83 |             del data["args_data"]
 84 |         if "exp_name" not in data:
 85 |             data["exp_name"] = params_json_path.split("/")[-2]
 86 |     return data
 87 | 
 88 | 
 89 | def lookup(d, keys):
 90 |     if not isinstance(keys, list):
 91 |         keys = keys.split(".")
 92 |     for k in keys:
 93 |         if hasattr(d, "__getitem__"):
 94 |             if k in d:
 95 |                 d = d[k]
 96 |             else:
 97 |                 return None
 98 |         else:
 99 |             return None
100 |     return d
101 | 
102 | 
103 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True):
104 |     if isinstance(exp_folder_paths, str):
105 |         exp_folder_paths = [exp_folder_paths]
106 |     exps = []
107 |     for exp_folder_path in exp_folder_paths:
108 |         exps += [x[0] for x in os.walk(exp_folder_path)]
109 |     if verbose:
110 |         print("finished walking exp folders")
111 |     exps_data = []
112 |     for exp in exps:
113 |         try:
114 |             exp_path = exp
115 |             variant_json_path = os.path.join(exp_path, "variant.json")
116 |             progress_json_path = os.path.join(exp_path, "progress.json")
117 |             progress = load_progress(progress_json_path, verbose=verbose)
118 |             try:
119 |                 params = load_params(variant_json_path)
120 |             except IOError:
121 |                 params = dict(exp_name="experiment")
122 |             exps_data.append(AttrDict(
123 |                 progress=progress, params=params, flat_params=flatten_dict(params)))
124 |         except IOError as e:
125 |             if verbose:
126 |                 print(e)
127 | 
128 |     # a dictionary of all keys and types of values
129 |     all_keys = dict()
130 |     for data in exps_data:
131 |         for key in data.flat_params.keys():
132 |             if key not in all_keys:
133 |                 all_keys[key] = type(data.flat_params[key])
134 | 
135 |     # if any data does not have some key, specify the value of it
136 |     if not ignore_missing_keys:
137 |         default_values = dict()
138 |         for data in exps_data:
139 |             for key in sorted(all_keys.keys()):
140 |                 if key not in data.flat_params:
141 |                     if key not in default_values:
142 |                         default = None
143 |                         default_values[key] = default
144 |                     data.flat_params[key] = default_values[key]
145 | 
146 |     return exps_data
147 | 
148 | 
149 | def smart_repr(x):
150 |     if isinstance(x, tuple):
151 |         if len(x) == 0:
152 |             return "tuple()"
153 |         elif len(x) == 1:
154 |             return "(%s,)" % smart_repr(x[0])
155 |         else:
156 |             return "(" + ",".join(map(smart_repr, x)) + ")"
157 |     else:
158 |         if hasattr(x, "__call__"):
159 |             return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__)
160 |         else:
161 |             return repr(x)
162 | 
163 | 
164 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1):
165 |     try:
166 |         stringified_pairs = sorted(
167 |             map(
168 |                 eval,
169 |                 unique(
170 |                     flatten(
171 |                         [
172 |                             list(
173 |                                 map(
174 |                                     smart_repr,
175 |                                     list(d.flat_params.items())
176 |                                 )
177 |                             )
178 |                             for d in exps_data
179 |                         ]
180 |                     )
181 |                 )
182 |             ),
183 |             key=lambda x: (
184 |                 tuple("" if it is None else str(it) for it in x),
185 |             )
186 |         )
187 |     except Exception as e:
188 |         print(e)
189 |         import ipdb
190 |         ipdb.set_trace()
191 |     proposals = [(k, [x[1] for x in v])
192 |                  for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])]
193 |     filtered = [(k, v) for (k, v) in proposals if len(v) > l and all(
194 |         [k.find(excluded_param) != 0 for excluded_param in excluded_params])]
195 |     return filtered
196 | 
197 | 
198 | class Selector(object):
199 |     def __init__(self, exps_data, filters=None, custom_filters=None):
200 |         self._exps_data = exps_data
201 |         if filters is None:
202 |             self._filters = tuple()
203 |         else:
204 |             self._filters = tuple(filters)
205 |         if custom_filters is None:
206 |             self._custom_filters = []
207 |         else:
208 |             self._custom_filters = custom_filters
209 | 
210 |     def where(self, k, v):
211 |         return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters)
212 | 
213 |     def custom_filter(self, filter):
214 |         return Selector(self._exps_data, self._filters, self._custom_filters + [filter])
215 | 
216 |     def _check_exp(self, exp):
217 |         # or exp.flat_params.get(k, None) is None
218 |         return all(
219 |             ((str(exp.flat_params.get(k, None)) == str(v) or (
220 |                 k not in exp.flat_params)) for k, v in self._filters)
221 |         ) and all(custom_filter(exp) for custom_filter in self._custom_filters)
222 | 
223 |     def extract(self):
224 |         return list(filter(self._check_exp, self._exps_data))
225 | 
226 |     def iextract(self):
227 |         return filter(self._check_exp, self._exps_data)
228 | 
229 | 
230 | # Taken from plot.ly
231 | color_defaults = [
232 |     '#1f77b4',  # muted blue
233 |     '#ff7f0e',  # safety orange
234 |     '#2ca02c',  # cooked asparagus green
235 |     '#d62728',  # brick red
236 |     '#9467bd',  # muted purple
237 |     '#8c564b',  # chestnut brown
238 |     '#e377c2',  # raspberry yogurt pink
239 |     '#7f7f7f',  # middle gray
240 |     '#bcbd22',  # curry yellow-green
241 |     '#17becf'  # blue-teal
242 | ]
243 | 
244 | 
245 | def hex_to_rgb(hex, opacity=1.0):
246 |     if hex[0] == '#':
247 |         hex = hex[1:]
248 |     assert (len(hex) == 6)
249 |     return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity)
250 | 


--------------------------------------------------------------------------------
/labs/lab5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab5.pdf


--------------------------------------------------------------------------------
/labs/lab5/alg_utils.py:
--------------------------------------------------------------------------------
  1 | from utils import *
  2 | 
  3 | 
  4 | # ==============================
  5 | # Shared utilities
  6 | # ==============================
  7 | 
  8 | def compute_cumulative_returns(rewards, baselines, discount):
  9 |     # This method builds up the cumulative sum of discounted rewards for each time step:
 10 |     # R[t] = sum_{t'>=t} γ^(t'-t)*r_t'
 11 |     # Note that we use γ^(t'-t) instead of γ^t'. This gives us a biased gradient but lower variance
 12 |     returns = []
 13 |     # Use the last baseline prediction to back up
 14 |     cum_return = baselines[-1]
 15 |     for reward in rewards[::-1]:
 16 |         cum_return = cum_return * discount + reward
 17 |         returns.append(cum_return)
 18 |     return returns[::-1]
 19 | 
 20 | 
 21 | def compute_advantages(rewards, baselines, discount, gae_lambda):
 22 |     # Given returns R_t and baselines b(s_t), compute (generalized) advantage estimate A_t
 23 |     deltas = rewards + discount * baselines[1:] - baselines[:-1]
 24 |     advs = []
 25 |     cum_adv = 0
 26 |     multiplier = discount * gae_lambda
 27 |     for delta in deltas[::-1]:
 28 |         cum_adv = cum_adv * multiplier + delta
 29 |         advs.append(cum_adv)
 30 |     return advs[::-1]
 31 | 
 32 | 
 33 | def compute_pg_vars(trajs, policy, baseline, discount, gae_lambda):
 34 |     """
 35 |     Compute chainer variables needed for various policy gradient algorithms
 36 |     """
 37 |     for traj in trajs:
 38 |         # Include the last observation here, in case the trajectory is not finished
 39 |         baselines = baseline.predict(np.concatenate(
 40 |             [traj["observations"], [traj["last_observation"]]]))
 41 |         if traj['finished']:
 42 |             # If already finished, the future cumulative rewards starting from the final state is 0
 43 |             baselines[-1] = 0.
 44 |         # This is useful when fitting baselines. It uses the baseline prediction of the last state value to perform
 45 |         # Bellman backup if the trajectory is not finished.
 46 |         traj['returns'] = compute_cumulative_returns(
 47 |             traj['rewards'], baselines, discount)
 48 |         traj['advantages'] = compute_advantages(
 49 |             traj['rewards'], baselines, discount, gae_lambda)
 50 |         traj['baselines'] = baselines[:-1]
 51 | 
 52 |     # First, we compute a flattened list of observations, actions, and advantages
 53 |     all_obs = np.concatenate([traj['observations'] for traj in trajs], axis=0)
 54 |     all_acts = np.concatenate([traj['actions'] for traj in trajs], axis=0)
 55 |     all_advs = np.concatenate([traj['advantages'] for traj in trajs], axis=0)
 56 |     all_dists = {
 57 |         k: np.concatenate([traj['distributions'][k] for traj in trajs], axis=0)
 58 |         for k in trajs[0]['distributions'].keys()
 59 |     }
 60 | 
 61 |     # Normalizing the advantage values can make the algorithm more robust to reward scaling
 62 |     all_advs = (all_advs - np.mean(all_advs)) / (np.std(all_advs) + 1e-8)
 63 | 
 64 |     # Form chainer variables
 65 |     all_obs = Variable(all_obs)
 66 |     all_acts = Variable(all_acts)
 67 |     all_advs = Variable(all_advs.astype(np.float32, copy=False))
 68 |     all_dists = policy.distribution.from_dict(
 69 |         {k: Variable(v) for k, v in all_dists.items()})
 70 | 
 71 |     return all_obs, all_acts, all_advs, all_dists
 72 | 
 73 | 
 74 | # ==============================
 75 | # Helper methods for logging
 76 | # ==============================
 77 | 
 78 | def log_reward_statistics(env):
 79 |     # keep unwrapping until we get the monitor
 80 |     while not isinstance(env, gym.wrappers.Monitor):  # and not isinstance()
 81 |         if not isinstance(env, gym.Wrapper):
 82 |             assert False
 83 |         env = env.env
 84 |     # env.unwrapped
 85 |     assert isinstance(env, gym.wrappers.Monitor)
 86 |     all_stats = None
 87 |     for _ in range(10):
 88 |         try:
 89 |             all_stats = gym.wrappers.monitoring.load_results(env.directory)
 90 |         except FileNotFoundError:
 91 |             time.sleep(1)
 92 |             continue
 93 |     if all_stats is not None:
 94 |         episode_rewards = all_stats['episode_rewards']
 95 |         episode_lengths = all_stats['episode_lengths']
 96 | 
 97 |         recent_episode_rewards = episode_rewards[-100:]
 98 |         recent_episode_lengths = episode_lengths[-100:]
 99 | 
100 |         if len(recent_episode_rewards) > 0:
101 |             logger.logkv('AverageReturn', np.mean(recent_episode_rewards))
102 |             logger.logkv('MinReturn', np.min(recent_episode_rewards))
103 |             logger.logkv('MaxReturn', np.max(recent_episode_rewards))
104 |             logger.logkv('StdReturn', np.std(recent_episode_rewards))
105 |             logger.logkv('AverageEpisodeLength',
106 |                          np.mean(recent_episode_lengths))
107 |             logger.logkv('MinEpisodeLength', np.min(recent_episode_lengths))
108 |             logger.logkv('MaxEpisodeLength', np.max(recent_episode_lengths))
109 |             logger.logkv('StdEpisodeLength', np.std(recent_episode_lengths))
110 | 
111 |         logger.logkv('TotalNEpisodes', len(episode_rewards))
112 |         logger.logkv('TotalNSamples', np.sum(episode_lengths))
113 | 
114 | 
115 | def log_baseline_statistics(trajs):
116 |     # Specifically, compute the explained variance, defined as
117 |     baselines = np.concatenate([traj['baselines'] for traj in trajs])
118 |     returns = np.concatenate([traj['returns'] for traj in trajs])
119 |     logger.logkv('ExplainedVariance',
120 |                  explained_variance_1d(baselines, returns))
121 | 
122 | 
123 | def log_action_distribution_statistics(dists):
124 |     with chainer.no_backprop_mode():
125 |         entropy = F.mean(dists.entropy()).data
126 |         logger.logkv('Entropy', entropy)
127 |         logger.logkv('Perplexity', np.exp(entropy))
128 |         if isinstance(dists, Gaussian):
129 |             logger.logkv('AveragePolicyStd', F.mean(
130 |                 F.exp(dists.log_stds)).data)
131 |             for idx in range(dists.log_stds.shape[-1]):
132 |                 logger.logkv('AveragePolicyStd[{}]'.format(
133 |                     idx), F.mean(F.exp(dists.log_stds[..., idx])).data)
134 |         elif isinstance(dists, Categorical):
135 |             probs = F.mean(F.softmax(dists.logits), axis=0).data
136 |             for idx in range(len(probs)):
137 |                 logger.logkv('AveragePolicyProb[{}]'.format(idx), probs[idx])
138 | 


--------------------------------------------------------------------------------
/labs/lab5/algs.py:
--------------------------------------------------------------------------------
1 | from pg import pg
2 | from trpo import trpo
3 | from a2c import a2c
4 | 


--------------------------------------------------------------------------------
/labs/lab5/cloudexec.yml.template:
--------------------------------------------------------------------------------
 1 | ## Attendee-specific
 2 | 
 3 | attendee_id: &attendee_id YOUR_ID_HERE
 4 | ec2_instance_label: *attendee_id
 5 | s3_bucket_root: *attendee_id
 6 | 
 7 | aws_access_key: YOUR_ACCESS_KEY_ID_HERE
 8 | aws_access_secret: YOUR_SECRET_ACCESS_KEY_HERE
 9 | 
10 | # TODO run `python scripts/generate_key_pairs.py`, and fill this in!
11 | 
12 | aws_key_pairs:
13 |     us-east-1: YOUR_KEY_PAIR_NAME
14 | 
15 | ## Bootcamp-specific
16 | docker_image: dementrock/deeprlbootcamp
17 | 
18 | # Since we are using a public docker image, no need to log in
19 | docker_username: ~
20 | docker_password: ~
21 | docker_host: ~
22 | 
23 | s3_bucket: deeprlbootcamp
24 | aws_s3_region: us-east-1
25 | 
26 | # Instance configuration
27 | aws_instance_type: c4.large
28 | aws_use_spot_instances: true
29 | aws_spot_price: 0.1
30 | 
31 | aws_iam_instance_profile: attendee_instance_profile
32 | 
33 | aws_image_id:
34 |     ap-northeast-1: ami-a3c737c5
35 |     ap-northeast-2: ami-8faa72e1
36 |     ap-south-1: ami-65094c0a
37 |     ap-southeast-1: ami-c6bfdba5
38 |     ap-southeast-2: ami-597b603a
39 |     ca-central-1: ami-60982604
40 |     eu-central-1: ami-06933b69
41 |     eu-west-1: ami-ba08f5c3
42 |     eu-west-2: ami-13524277
43 |     sa-east-1: ami-8be293e7
44 |     us-east-1: ami-b0c2fecb
45 |     us-east-2: ami-6f43600a
46 |     us-west-1: ami-1ecce67e
47 |     us-west-2: ami-999e72e1
48 | 
49 | aws_regions:
50 |     - us-east-1
51 | 
52 | aws_security_groups:
53 |     us-east-1: sg-55afed24
54 | 
55 | aws_subnets:
56 |     us-east-1:
57 |         us-east-1a: subnet-55155d1d
58 |         us-east-1b: subnet-5953da03
59 |         us-east-1c: subnet-82fe18e6
60 |         us-east-1d: subnet-7db42551
61 |         us-east-1e: subnet-4b686877
62 |         us-east-1f: subnet-2076172c
63 | 
64 | ## Other default settings
65 | 
66 | ec2_project_root: /home/ubuntu/code
67 | ec2_user: ubuntu
68 | 
69 | ec2_terminate_machine: true
70 | 
71 | s3_code_sync_ignores:
72 |     - data/local
73 |     - data/s3
74 |     - build_lab3
75 |     - build_lab4
76 |     - build_lab5
77 |     - bullet3
78 |     - roboschool
79 |     - private
80 |     - pong_warm_start.pkl
81 |     - dqn/replay_buffer.pkl
82 |     - dqn/weights.pkl
83 |     - Lab-Policy-Gradient-Algorithms
84 |     - Lab-RL-in-the-Cloud
85 |     - .git
86 |     - .gitignore
87 |     - .pods
88 |     - .DS_Store
89 |     - .idea
90 |     - cloudexec.yml
91 |     - __pycache__
92 | 
93 | s3_periodic_sync_interval: 15
94 | 
95 | s3_periodic_sync_include_flags: "--include *progress.json --include *variant.json"
96 | 
97 | debug: false
98 | 


--------------------------------------------------------------------------------
/labs/lab5/docker_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | viskit_port=$("$DIR/findport.py" 5000 1)
 4 | xhost=xhost
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | if [[ $(uname) == 'Darwin' ]]; then
12 |     # if xhost not defined, check 
13 |     if ! hash $xhost 2>/dev/null; then
14 |         xhost=/opt/X11/bin/xhost
15 |         if [ ! -f $xhost ]; then
16 |             echo "xhost not found!"
17 |             exit
18 |         fi
19 |     fi
20 |     ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}')
21 |     $xhost + $ip >/dev/null
22 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
23 |         -e DISPLAY=$ip:0 \
24 |         -v "$DIR":/root/code/bootcamp_pg \
25 |         -ti dementrock/deeprlbootcamp \
26 |           ${1-/bin/bash} "${@:2}"
27 |     $xhost - $ip >/dev/null
28 | elif [[ $(uname) == 'Linux' ]]; then
29 |     $xhost +local:root >/dev/null
30 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
31 |         -e DISPLAY=$DISPLAY \
32 |         -v /tmp/.X11-unix:/tmp/.X11-unix \
33 |         -v "$DIR":/root/code/bootcamp_pg \
34 |         -ti dementrock/deeprlbootcamp \
35 |           ${1-/bin/bash} "${@:2}"
36 |     $xhost -local:root >/dev/null
37 | else
38 |     echo "This script only supports macOS or Linux"
39 | fi
40 | 


--------------------------------------------------------------------------------
/labs/lab5/docker_run_vnc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | vnc_port=$("$DIR/findport.py" 3000 1)
 4 | viskit_port=$("$DIR/findport.py" 5000 1)
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284"
12 | $docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
13 |     -v "$DIR":/root/code/bootcamp_pg \
14 |     -ti dementrock/deeprlbootcamp \
15 |       ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}"
16 | 


--------------------------------------------------------------------------------
/labs/lab5/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | channels:
 3 |     - menpo
 4 |     - soumith
 5 | dependencies:
 6 |     - python==3.5.3
 7 |     - opencv3=3.1.0
 8 |     - numpy==1.13.1
 9 |     - scipy==0.19.1
10 |     - pip:
11 |         - gym==0.9.2
12 |         - chainer==2.0.1
13 |         - ipdb==0.10.3
14 |         - tblib==1.3.2
15 |         - atari_py==0.1.1
16 |         - Pillow==4.2.1
17 |         - PyOpenGL==3.1.0
18 |         - cloudpickle==0.3.1
19 |         - click==6.7
20 |         - python-dateutil==2.6.1
21 |         - pyyaml==3.12
22 |         - easydict==1.7
23 |         - boto3==1.4.4
24 |         - mako==1.0.7
25 |         - redis==2.10.5
26 |         - Flask==0.12.2
27 |         - plotly==2.0.12
28 |         - tqdm==4.14.0
29 |         - cupy==1.0.1; 'linux' in sys_platform
30 |         - cached-property==1.3.0
31 |         - h5py==2.7.0
32 | 


--------------------------------------------------------------------------------
/labs/lab5/experiments/run_a2c_breakout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from algs import a2c
 3 | from env_makers import EnvMaker
 4 | from models import CategoricalCNNPolicy
 5 | from utils import SnapshotSaver
 6 | import numpy as np
 7 | import os
 8 | import logger
 9 | 
10 | log_dir = "data/local/a2c-breakout"
11 | 
12 | np.random.seed(42)
13 | 
14 | # Clean up existing logs
15 | os.system("rm -rf {}".format(log_dir))
16 | 
17 | with logger.session(log_dir):
18 |     env_maker = EnvMaker('BreakoutNoFrameskip-v4')
19 |     env = env_maker.make()
20 |     policy = CategoricalCNNPolicy(
21 |         env.observation_space, env.action_space, env.spec)
22 |     vf = policy.create_vf()
23 |     a2c(
24 |         env=env,
25 |         env_maker=env_maker,
26 |         n_envs=16,
27 |         policy=policy,
28 |         vf=vf,
29 |         snapshot_saver=SnapshotSaver(log_dir, interval=10),
30 |     )
31 | 


--------------------------------------------------------------------------------
/labs/lab5/experiments/run_a2c_pong.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from algs import a2c
 3 | from env_makers import EnvMaker
 4 | from models import CategoricalCNNPolicy
 5 | from utils import SnapshotSaver
 6 | import numpy as np
 7 | import os
 8 | import logger
 9 | 
10 | log_dir = "data/local/a2c-pong"
11 | 
12 | np.random.seed(42)
13 | 
14 | # Clean up existing logs
15 | os.system("rm -rf {}".format(log_dir))
16 | 
17 | with logger.session(log_dir):
18 |     env_maker = EnvMaker('PongNoFrameskip-v4')
19 |     env = env_maker.make()
20 |     policy = CategoricalCNNPolicy(
21 |         env.observation_space, env.action_space, env.spec)
22 |     vf = policy.create_vf()
23 |     a2c(
24 |         env=env,
25 |         env_maker=env_maker,
26 |         n_envs=16,
27 |         policy=policy,
28 |         vf=vf,
29 |         snapshot_saver=SnapshotSaver(log_dir, interval=10),
30 |     )
31 | 


--------------------------------------------------------------------------------
/labs/lab5/experiments/run_cloud_trpo_cartpole.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import cloudexec
 4 | import numpy as np
 5 | from env_makers import EnvMaker
 6 | from models import MLPBaseline, CategoricalMLPPolicy
 7 | from algs import trpo
 8 | from utils import SnapshotSaver
 9 | import logger
10 | 
11 | 
12 | def run(v):
13 |     np.random.seed(v['seed'])
14 |     env_maker = EnvMaker('CartPole-v0')
15 |     env = env_maker.make()
16 |     policy = CategoricalMLPPolicy(
17 |         observation_space=env.observation_space,
18 |         action_space=env.action_space,
19 |         env_spec=env.spec
20 |     )
21 |     baseline = MLPBaseline(
22 |         observation_space=env.observation_space,
23 |         action_space=env.action_space,
24 |         env_spec=env.spec
25 |     )
26 |     trpo(
27 |         env=env,
28 |         env_maker=env_maker,
29 |         n_envs=16,
30 |         policy=policy,
31 |         baseline=baseline,
32 |         batch_size=2000,
33 |         n_iters=100,
34 |         snapshot_saver=SnapshotSaver(logger.get_dir())
35 |     )
36 | 
37 | 
38 | cloudexec.remote_call(
39 |     task=cloudexec.Task(
40 |         run,
41 |         variant=dict(seed=0),
42 |     ),
43 |     config=cloudexec.Config(
44 |         exp_group="trpo-cartpole",
45 |     ),
46 |     mode=cloudexec.local_mode,
47 | )
48 | 


--------------------------------------------------------------------------------
/labs/lab5/experiments/run_cloud_trpo_pendulum_baseline.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import cloudexec
 4 | from cloudexec import VariantGenerator
 5 | import numpy as np
 6 | from env_makers import EnvMaker
 7 | from models import MLPBaseline, TimeDependentBaseline, LinearFeatureBaseline
 8 | from models import GaussianMLPPolicy
 9 | from algs import trpo
10 | from utils import SnapshotSaver
11 | import chainer
12 | import logger
13 | 
14 | 
15 | def run(v):
16 |     np.random.seed(v['seed'])
17 |     env_maker = EnvMaker('Pendulum-v0')
18 |     env = env_maker.make()
19 |     policy = GaussianMLPPolicy(
20 |         observation_space=env.observation_space,
21 |         action_space=env.action_space,
22 |         env_spec=env.spec,
23 |         hidden_sizes=(64, 64),
24 |         hidden_nonlinearity=chainer.functions.tanh,
25 |     )
26 |     if v['baseline'] == 'mlp':
27 |         baseline = MLPBaseline(
28 |             observation_space=env.observation_space,
29 |             action_space=env.action_space,
30 |             env_spec=env.spec,
31 |             hidden_sizes=(64, 64),
32 |             hidden_nonlinearity=chainer.functions.tanh,
33 |         )
34 |     elif v['baseline'] == 'time_dependent':
35 |         baseline = TimeDependentBaseline(
36 |             observation_space=env.observation_space,
37 |             action_space=env.action_space,
38 |             env_spec=env.spec,
39 |         )
40 |     elif v['baseline'] == 'linear_feature':
41 |         baseline = LinearFeatureBaseline(
42 |             observation_space=env.observation_space,
43 |             action_space=env.action_space,
44 |             env_spec=env.spec,
45 |         )
46 |     else:
47 |         raise ValueError
48 |     trpo(
49 |         env=env,
50 |         env_maker=env_maker,
51 |         n_envs=16,
52 |         policy=policy,
53 |         baseline=baseline,
54 |         batch_size=10000,
55 |         n_iters=100,
56 |         snapshot_saver=SnapshotSaver(logger.get_dir()),
57 |     )
58 | 
59 | 
60 | vg = VariantGenerator()
61 | vg.add("seed", [0, 100, 200])
62 | vg.add("baseline", ['mlp', 'linear_feature', 'time_dependent'])
63 | 
64 | for variant in vg.variants():
65 |     cloudexec.remote_call(
66 |         task=cloudexec.Task(
67 |             run,
68 |             variant=variant,
69 |         ),
70 |         config=cloudexec.Config(
71 |             exp_group="trpo-pendulum-baseline",
72 |         ),
73 |         mode=cloudexec.local_mode,
74 |     )
75 | 


--------------------------------------------------------------------------------
/labs/lab5/experiments/run_pg_cartpole.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import chainer
 3 | 
 4 | from algs import pg
 5 | from env_makers import EnvMaker
 6 | from models import CategoricalMLPPolicy, MLPBaseline
 7 | from utils import SnapshotSaver
 8 | import numpy as np
 9 | import os
10 | import logger
11 | 
12 | log_dir = "data/local/pg-cartpole"
13 | 
14 | np.random.seed(42)
15 | 
16 | # Clean up existing logs
17 | os.system("rm -rf {}".format(log_dir))
18 | 
19 | with logger.session(log_dir):
20 |     env_maker = EnvMaker('CartPole-v0')
21 |     env = env_maker.make()
22 |     policy = CategoricalMLPPolicy(observation_space=env.observation_space, action_space=env.action_space,
23 |                                   env_spec=env.spec)
24 |     baseline = MLPBaseline(observation_space=env.observation_space, action_space=env.action_space,
25 |                            env_spec=env.spec)
26 |     pg(
27 |         env=env,
28 |         env_maker=env_maker,
29 |         n_envs=16,
30 |         policy=policy,
31 |         baseline=baseline,
32 |         batch_size=2000,
33 |         n_iters=100,
34 |         snapshot_saver=SnapshotSaver(log_dir),
35 |         optimizer=chainer.optimizers.Adam(1e-2)
36 |     )
37 | 


--------------------------------------------------------------------------------
/labs/lab5/experiments/run_trpo_cartpole.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from algs import trpo
 3 | from env_makers import EnvMaker
 4 | from models import CategoricalMLPPolicy, MLPBaseline
 5 | from utils import SnapshotSaver
 6 | import numpy as np
 7 | import os
 8 | import logger
 9 | 
10 | log_dir = "data/local/trpo-cartpole"
11 | 
12 | np.random.seed(42)
13 | 
14 | # Clean up existing logs
15 | os.system("rm -rf {}".format(log_dir))
16 | 
17 | with logger.session(log_dir):
18 |     env_maker = EnvMaker('CartPole-v0')
19 |     env = env_maker.make()
20 |     policy = CategoricalMLPPolicy(
21 |         observation_space=env.observation_space,
22 |         action_space=env.action_space,
23 |         env_spec=env.spec
24 |     )
25 |     baseline = MLPBaseline(
26 |         observation_space=env.observation_space,
27 |         action_space=env.action_space,
28 |         env_spec=env.spec
29 |     )
30 |     trpo(
31 |         env=env,
32 |         env_maker=env_maker,
33 |         n_envs=16,
34 |         policy=policy,
35 |         baseline=baseline,
36 |         batch_size=2000,
37 |         n_iters=100,
38 |         snapshot_saver=SnapshotSaver(log_dir)
39 |     )
40 | 


--------------------------------------------------------------------------------
/labs/lab5/experiments/run_trpo_half_cheetah.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import chainer
 3 | 
 4 | from algs import trpo
 5 | from env_makers import EnvMaker
 6 | from models import GaussianMLPPolicy, MLPBaseline
 7 | from utils import SnapshotSaver
 8 | import numpy as np
 9 | import os
10 | import logger
11 | 
12 | log_dir = "data/local/trpo-half-cheetah"
13 | 
14 | np.random.seed(42)
15 | 
16 | # Clean up existing logs
17 | os.system("rm -rf {}".format(log_dir))
18 | 
19 | with logger.session(log_dir):
20 |     env_maker = EnvMaker('RoboschoolHalfCheetah-v1')
21 |     env = env_maker.make()
22 |     policy = GaussianMLPPolicy(
23 |         observation_space=env.observation_space,
24 |         action_space=env.action_space,
25 |         env_spec=env.spec,
26 |         hidden_sizes=(256, 64),
27 |         hidden_nonlinearity=chainer.functions.tanh,
28 |     )
29 |     baseline = MLPBaseline(
30 |         observation_space=env.observation_space,
31 |         action_space=env.action_space,
32 |         env_spec=env.spec,
33 |         hidden_sizes=(256, 64),
34 |         hidden_nonlinearity=chainer.functions.tanh,
35 |     )
36 |     trpo(
37 |         env=env,
38 |         env_maker=env_maker,
39 |         n_envs=16,
40 |         policy=policy,
41 |         baseline=baseline,
42 |         batch_size=5000,
43 |         n_iters=5000,
44 |         snapshot_saver=SnapshotSaver(log_dir, interval=10),
45 |     )
46 | 


--------------------------------------------------------------------------------
/labs/lab5/experiments/run_trpo_pendulum.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import chainer
 3 | 
 4 | from algs import trpo
 5 | from env_makers import EnvMaker
 6 | from models import GaussianMLPPolicy, MLPBaseline
 7 | from utils import SnapshotSaver
 8 | import numpy as np
 9 | import os
10 | import logger
11 | 
12 | log_dir = "data/local/trpo-pendulum"
13 | 
14 | np.random.seed(42)
15 | 
16 | # Clean up existing logs
17 | os.system("rm -rf {}".format(log_dir))
18 | 
19 | with logger.session(log_dir):
20 |     env_maker = EnvMaker('Pendulum-v0')
21 |     env = env_maker.make()
22 |     policy = GaussianMLPPolicy(
23 |         observation_space=env.observation_space,
24 |         action_space=env.action_space,
25 |         env_spec=env.spec,
26 |         hidden_sizes=(64, 64),
27 |         hidden_nonlinearity=chainer.functions.tanh,
28 |     )
29 |     baseline = MLPBaseline(
30 |         observation_space=env.observation_space,
31 |         action_space=env.action_space,
32 |         env_spec=env.spec,
33 |         hidden_sizes=(64, 64),
34 |         hidden_nonlinearity=chainer.functions.tanh,
35 |     )
36 |     trpo(
37 |         env=env,
38 |         env_maker=env_maker,
39 |         n_envs=16,
40 |         policy=policy,
41 |         baseline=baseline,
42 |         batch_size=10000,
43 |         n_iters=100,
44 |         snapshot_saver=SnapshotSaver(log_dir),
45 |     )
46 | 


--------------------------------------------------------------------------------
/labs/lab5/findport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Usage: findport.py 3000 100
 4 | #
 5 | from __future__ import print_function
 6 | import socket
 7 | from contextlib import closing
 8 | import sys
 9 | 
10 | if len(sys.argv) != 3:
11 |     print("Usage: {} <base_port> <increment>".format(sys.argv[0]))
12 |     sys.exit(1)
13 | 
14 | base = int(sys.argv[1])
15 | increment = int(sys.argv[2])
16 | 
17 | 
18 | def find_free_port():
19 |     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
20 |         for port in range(base, 65536, increment):
21 |             try:
22 |                 s.bind(('', port))
23 |                 return s.getsockname()[1]
24 |             except socket.error:
25 |                 continue
26 | 
27 | 
28 | print(find_free_port())
29 | 


--------------------------------------------------------------------------------
/labs/lab5/launch_bg_screen_buffer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | killall() {
 4 | kill -INT "$xvfb_pid" 
 5 | kill -INT "$x11vnc_pid" 
 6 | exit
 7 | }
 8 | 
 9 | trap killall SIGINT
10 | trap killall SIGTERM
11 | trap killall SIGKILL
12 | 
13 | Xvfb :99 -screen 0 1024x768x24 -ac  +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$!
14 | 
15 | mkdir ~/.x11vnc
16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd
17 | 
18 | command="${1-/bin/bash} ${@:2}"
19 | 
20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!"
21 | 
22 | DISPLAY=:99 $command
23 | 
24 | killall
25 | 


--------------------------------------------------------------------------------
/labs/lab5/logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | See README.md for a description of the logging API.
  4 | 
  5 | OFF state corresponds to having Logger.CURRENT == Logger.DEFAULT
  6 | ON state is otherwise
  7 | 
  8 | """
  9 | import datetime
 10 | from collections import OrderedDict
 11 | import os
 12 | import sys
 13 | import shutil
 14 | import os.path as osp
 15 | import json
 16 | 
 17 | import dateutil.tz
 18 | 
 19 | LOG_OUTPUT_FORMATS = ['stdout', 'log', 'json']
 20 | 
 21 | DEBUG = 10
 22 | INFO = 20
 23 | WARN = 30
 24 | ERROR = 40
 25 | 
 26 | DISABLED = 50
 27 | 
 28 | 
 29 | class OutputFormat(object):
 30 |     def writekvs(self, kvs):
 31 |         """
 32 |         Write key-value pairs
 33 |         """
 34 |         raise NotImplementedError
 35 | 
 36 |     def writeseq(self, args):
 37 |         """
 38 |         Write a sequence of other data (e.g. a logging message)
 39 |         """
 40 |         pass
 41 | 
 42 |     def close(self):
 43 |         return
 44 | 
 45 | 
 46 | class HumanOutputFormat(OutputFormat):
 47 |     def __init__(self, file):
 48 |         self.file = file
 49 | 
 50 |     def writekvs(self, kvs):
 51 |         # Create strings for printing
 52 |         key2str = OrderedDict()
 53 |         for (key, val) in kvs.items():
 54 |             valstr = '%-8.5g' % (val,) if hasattr(val, '__float__') else val
 55 |             key2str[self._truncate(key)] = self._truncate(valstr)
 56 | 
 57 |         # Find max widths
 58 |         keywidth = max(map(len, key2str.keys()))
 59 |         valwidth = max(map(len, key2str.values()))
 60 | 
 61 |         # Write out the data
 62 |         dashes = '-' * (keywidth + valwidth + 7)
 63 |         lines = [dashes]
 64 |         for (key, val) in key2str.items():
 65 |             lines.append('| %s%s | %s%s |' % (
 66 |                 key,
 67 |                 ' ' * (keywidth - len(key)),
 68 |                 val,
 69 |                 ' ' * (valwidth - len(val)),
 70 |             ))
 71 |         lines.append(dashes)
 72 |         self.file.write('\n'.join(lines) + '\n')
 73 | 
 74 |         # Flush the output to the file
 75 |         self.file.flush()
 76 | 
 77 |     def _truncate(self, s):
 78 |         return s[:20] + '...' if len(s) > 23 else s
 79 | 
 80 |     def writeseq(self, args):
 81 |         for arg in args:
 82 |             self.file.write(arg)
 83 |         self.file.write('\n')
 84 |         self.file.flush()
 85 | 
 86 | 
 87 | class JSONOutputFormat(OutputFormat):
 88 |     def __init__(self, file):
 89 |         self.file = file
 90 | 
 91 |     def writekvs(self, kvs):
 92 |         for k, v in kvs.items():
 93 |             if hasattr(v, 'dtype'):
 94 |                 v = v.tolist()
 95 |                 kvs[k] = float(v)
 96 |         self.file.write(json.dumps(kvs) + '\n')
 97 |         self.file.flush()
 98 | 
 99 | 
100 | def make_output_format(format, ev_dir):
101 |     os.makedirs(ev_dir, exist_ok=True)
102 |     if format == 'stdout':
103 |         return HumanOutputFormat(sys.stdout)
104 |     elif format == 'log':
105 |         log_file = open(osp.join(ev_dir, 'log.txt'), 'at')
106 |         return HumanOutputFormat(log_file)
107 |     elif format == 'json':
108 |         json_file = open(osp.join(ev_dir, 'progress.json'), 'at')
109 |         return JSONOutputFormat(json_file)
110 |     else:
111 |         raise ValueError('Unknown format specified: %s' % (format,))
112 | 
113 | 
114 | # ================================================================
115 | # API
116 | # ================================================================
117 | 
118 | 
119 | def logkv(key, val):
120 |     """
121 |     Log a value of some diagnostic
122 |     Call this once for each diagnostic quantity, each iteration
123 |     """
124 |     Logger.CURRENT.logkv(key, val)
125 | 
126 | 
127 | def dumpkvs():
128 |     """
129 |     Write all of the diagnostics from the current iteration
130 | 
131 |     level: int. (see old_logger.py docs) If the global logger level is higher than
132 |                 the level argument here, don't print to stdout.
133 |     """
134 |     Logger.CURRENT.dumpkvs()
135 | 
136 | 
137 | # for backwards compatibility
138 | record_tabular = logkv
139 | dump_tabular = dumpkvs
140 | 
141 | 
142 | def log(*args, level=INFO):
143 |     """
144 |     Write the sequence of args, with no separators, to the console and output files (if you've configured an output file).
145 |     """
146 |     Logger.CURRENT.log(*args, level=level)
147 | 
148 | 
149 | def debug(*args):
150 |     log(*args, level=DEBUG)
151 | 
152 | 
153 | def info(*args):
154 |     log(*args, level=INFO)
155 | 
156 | 
157 | def warn(*args):
158 |     log(*args, level=WARN)
159 | 
160 | 
161 | def error(*args):
162 |     log(*args, level=ERROR)
163 | 
164 | 
165 | def set_level(level):
166 |     """
167 |     Set logging threshold on current logger.
168 |     """
169 |     Logger.CURRENT.set_level(level)
170 | 
171 | 
172 | def get_level():
173 |     """
174 |     Set logging threshold on current logger.
175 |     """
176 |     return Logger.CURRENT.level
177 | 
178 | 
179 | def get_dir():
180 |     """
181 |     Get directory that log files are being written to.
182 |     will be None if there is no output directory (i.e., if you didn't call start)
183 |     """
184 |     return Logger.CURRENT.get_dir()
185 | 
186 | 
187 | def get_expt_dir():
188 |     sys.stderr.write(
189 |         "get_expt_dir() is Deprecated. Switch to get_dir() [%s]\n" % (get_dir(),))
190 |     return get_dir()
191 | 
192 | 
193 | # ================================================================
194 | # Backend
195 | # ================================================================
196 | 
197 | 
198 | class Logger(object):
199 |     # A logger with no output files. (See right below class definition)
200 |     DEFAULT = None
201 |     # So that you can still log to the terminal without setting up any output files
202 |     CURRENT = None  # Current logger being used by the free functions above
203 | 
204 |     def __init__(self, dir, output_formats):
205 |         self.name2val = OrderedDict()  # values this iteration
206 |         self.level = INFO
207 |         self.dir = dir
208 |         self.output_formats = output_formats
209 | 
210 |     # Logging API, forwarded
211 |     # ----------------------------------------
212 |     def logkv(self, key, val):
213 |         self.name2val[key] = val
214 | 
215 |     def dumpkvs(self):
216 |         for fmt in self.output_formats:
217 |             fmt.writekvs(self.name2val)
218 |         self.name2val.clear()
219 | 
220 |     def log(self, *args, level=INFO):
221 |         now = datetime.datetime.now(dateutil.tz.tzlocal())
222 |         timestamp = now.strftime('[%Y-%m-%d %H:%M:%S.%f %Z] ')
223 |         if self.level <= level:
224 |             self._do_log((timestamp,) + args)
225 | 
226 |     # Configuration
227 |     # ----------------------------------------
228 |     def set_level(self, level):
229 |         self.level = level
230 | 
231 |     def get_dir(self):
232 |         return self.dir
233 | 
234 |     def close(self):
235 |         for fmt in self.output_formats:
236 |             fmt.close()
237 | 
238 |     # Misc
239 |     # ----------------------------------------
240 |     def _do_log(self, args):
241 |         for fmt in self.output_formats:
242 |             fmt.writeseq(args)
243 | 
244 | 
245 | # ================================================================
246 | 
247 | Logger.DEFAULT = Logger(
248 |     output_formats=[HumanOutputFormat(sys.stdout)], dir=None)
249 | Logger.CURRENT = Logger.DEFAULT
250 | 
251 | 
252 | class session(object):
253 |     """
254 |     Context manager that sets up the loggers for an experiment.
255 |     """
256 | 
257 |     CURRENT = None  # Set to a LoggerContext object using enter/exit or context manager
258 | 
259 |     def __init__(self, dir, format_strs=None):
260 |         self.dir = dir
261 |         if format_strs is None:
262 |             format_strs = LOG_OUTPUT_FORMATS
263 |         output_formats = [make_output_format(f, dir) for f in format_strs]
264 |         Logger.CURRENT = Logger(dir=dir, output_formats=output_formats)
265 | 
266 |     def __enter__(self):
267 |         os.makedirs(self.evaluation_dir(), exist_ok=True)
268 |         output_formats = [make_output_format(
269 |             f, self.evaluation_dir()) for f in LOG_OUTPUT_FORMATS]
270 |         Logger.CURRENT = Logger(dir=self.dir, output_formats=output_formats)
271 | 
272 |     def __exit__(self, *args):
273 |         Logger.CURRENT.close()
274 |         Logger.CURRENT = Logger.DEFAULT
275 | 
276 |     def evaluation_dir(self):
277 |         return self.dir
278 | 
279 | 
280 | # ================================================================
281 | 
282 | 
283 | def _demo():
284 |     info("hi")
285 |     debug("shouldn't appear")
286 |     set_level(DEBUG)
287 |     debug("should appear")
288 |     dir = "/tmp/testlogging"
289 |     if os.path.exists(dir):
290 |         shutil.rmtree(dir)
291 |     with session(dir=dir):
292 |         record_tabular("a", 3)
293 |         record_tabular("b", 2.5)
294 |         dump_tabular()
295 |         record_tabular("b", -2.5)
296 |         record_tabular("a", 5.5)
297 |         dump_tabular()
298 |         info("^^^ should see a = 5.5")
299 | 
300 |     record_tabular("b", -2.5)
301 |     dump_tabular()
302 | 
303 |     record_tabular("a", "longasslongasslongasslongasslongasslongassvalue")
304 |     dump_tabular()
305 | 
306 | 
307 | if __name__ == "__main__":
308 |     _demo()
309 | 


--------------------------------------------------------------------------------
/labs/lab5/pg.py:
--------------------------------------------------------------------------------
  1 | from alg_utils import *
  2 | from simplepg.simple_utils import test_once, nprs
  3 | import tests.pg_tests
  4 | 
  5 | 
  6 | def pg(env, env_maker, policy, baseline, n_envs=mp.cpu_count(), last_iter=-1, n_iters=100, batch_size=1000,
  7 |        optimizer=chainer.optimizers.Adam(), discount=0.99, gae_lambda=0.97, snapshot_saver=None):
  8 |     """
  9 |     This method implements policy gradient algorithm.
 10 |     :param env: An environment instance, which should have the same class as what env_maker.make() returns.
 11 |     :param env_maker: An object such that calling env_maker.make() will generate a new environment.
 12 |     :param policy: A stochastic policy which we will be optimizing.
 13 |     :param baseline: A baseline used for variance reduction and estimating future returns for unfinished trajectories.
 14 |     :param n_envs: Number of environments running simultaneously.
 15 |     :param last_iter: The index of the last iteration. This is normally -1 when starting afresh, but may be different when
 16 |            loaded from a snapshot.
 17 |     :param n_iters: The total number of iterations to run.
 18 |     :param batch_size: The number of samples used per iteration.
 19 |     :param optimizer: A Chainer optimizer instance. By default we use the Adam algorithm with learning rate 1e-3.
 20 |     :param discount: Discount factor.
 21 |     :param gae_lambda: Lambda parameter used for generalized advantage estimation.
 22 |     :param snapshot_saver: An object for saving snapshots.
 23 |     """
 24 | 
 25 |     if getattr(optimizer, 'target', None) is not policy:
 26 |         optimizer.setup(policy)
 27 | 
 28 |     logger.info("Starting env pool")
 29 |     with EnvPool(env_maker, n_envs=n_envs) as env_pool:
 30 |         for iter in range(last_iter + 1, n_iters):
 31 |             logger.info("Starting iteration {}".format(iter))
 32 |             logger.logkv('Iteration', iter)
 33 | 
 34 |             logger.info("Start collecting samples")
 35 |             trajs = parallel_collect_samples(env_pool, policy, batch_size)
 36 | 
 37 |             logger.info("Computing input variables for policy optimization")
 38 |             all_obs, all_acts, all_advs, _ = compute_pg_vars(
 39 |                 trajs, policy, baseline, discount, gae_lambda
 40 |             )
 41 | 
 42 |             # Begin policy update
 43 | 
 44 |             # Now, you need to implement the computation of the policy gradient
 45 |             # The policy gradient is given by -1/T \sum_t \nabla_\theta(log(p_\theta(a_t|s_t))) * A_t
 46 |             # Note the negative sign in the front, since optimizers are most often minimizing a loss rather
 47 |             # This is the same as \nabla_\theta(-1/T \sum_t log(p_\theta(a_t|s_t)) * A_t) = \nabla_\theta(L), where L is the surrogate loss term
 48 | 
 49 |             logger.info("Computing policy gradient")
 50 | 
 51 |             # Methods that may be useful:
 52 |             # - `dists.logli(actions)' returns the log probability of the actions under the distribution `dists'.
 53 |             #   This method returns a chainer variable.
 54 | 
 55 |             dists = policy.compute_dists(all_obs)
 56 | 
 57 |             def compute_surr_loss(dists, all_acts, all_advs):
 58 |                 """
 59 |                 :param dists: An instance of subclass of Distribution
 60 |                 :param all_acts: A chainer variable, which should be a matrix of size N * |A|
 61 |                 :param all_advs: A chainer variable, which should be a vector of size N
 62 |                 :return: A chainer variable, which should be a scalar
 63 |                 """
 64 |                 surr_loss = Variable(np.array(0.))
 65 |                 logli = dists.logli(all_acts)
 66 |                 surr_loss = -F.mean(logli * all_advs)
 67 |                 "*** YOUR CODE HERE ***"
 68 |                 return surr_loss
 69 | 
 70 |             test_once(compute_surr_loss)
 71 | 
 72 |             surr_loss = compute_surr_loss(dists, all_acts, all_advs)
 73 | 
 74 |             # reset gradients stored in the policy parameters
 75 |             policy.cleargrads()
 76 |             surr_loss.backward()
 77 | 
 78 |             # apply the computed gradient
 79 |             optimizer.update()
 80 | 
 81 |             # Update baseline
 82 |             logger.info("Updating baseline")
 83 |             baseline.update(trajs)
 84 | 
 85 |             # log statistics
 86 |             logger.info("Computing logging information")
 87 |             logger.logkv('SurrLoss', surr_loss.data)
 88 |             log_action_distribution_statistics(dists)
 89 |             log_reward_statistics(env)
 90 |             log_baseline_statistics(trajs)
 91 |             logger.dumpkvs()
 92 | 
 93 |             if snapshot_saver is not None:
 94 |                 logger.info("Saving snapshot")
 95 |                 snapshot_saver.save_state(
 96 |                     iter,
 97 |                     dict(
 98 |                         alg=pg,
 99 |                         alg_state=dict(
100 |                             env_maker=env_maker,
101 |                             policy=policy,
102 |                             baseline=baseline,
103 |                             n_envs=n_envs,
104 |                             last_iter=iter,
105 |                             n_iters=n_iters,
106 |                             batch_size=batch_size,
107 |                             optimizer=optimizer,
108 |                             discount=discount,
109 |                             gae_lambda=gae_lambda
110 |                         )
111 |                     )
112 |                 )
113 | 


--------------------------------------------------------------------------------
/labs/lab5/scripts/generate_key_pairs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from cloudexec import get_cloudexec_config, get_project_root
 3 | import boto3
 4 | import botocore.exceptions
 5 | import os
 6 | 
 7 | if __name__ == "__main__":
 8 |     config = get_cloudexec_config()
 9 | 
10 |     key_names = dict()
11 | 
12 |     for region in config.aws_regions:
13 |         ec2_client = boto3.client(
14 |             "ec2",
15 |             region_name=region,
16 |             aws_access_key_id=config.aws_access_key,
17 |             aws_secret_access_key=config.aws_access_secret,
18 |         )
19 | 
20 |         key_name = "{attendee_id}_{region}".format(
21 |             attendee_id=config.attendee_id, region=region)
22 | 
23 |         key_names[region] = key_name
24 | 
25 |         print("Trying to create key pair with name %s" % key_name)
26 |         import cloudexec
27 |         file_name = cloudexec.local_ec2_key_pair_path(key_name)
28 | 
29 |         try:
30 |             key_pair = ec2_client.create_key_pair(KeyName=key_name)
31 |         except botocore.exceptions.ClientError as e:
32 |             if e.response['Error']['Code'] == 'InvalidKeyPair.Duplicate':
33 |                 if os.path.exists(file_name):
34 |                     print("Key pair with name {key_name} already exists.".format(
35 |                         key_name=key_name))
36 |                 else:
37 |                     print(
38 |                         "Key pair with name {key_name} exists remotely, but not locally! To fix this, "
39 |                         "delete the remote one first".format(key_name=key_name))
40 |                 continue
41 |             else:
42 |                 raise e
43 | 
44 |         print("Saving key pair file")
45 |         os.makedirs(os.path.dirname(file_name), exist_ok=True)
46 |         with os.fdopen(os.open(file_name, os.O_WRONLY | os.O_CREAT, 0o600), 'w') as handle:
47 |             handle.write(key_pair['KeyMaterial'] + '\n')
48 | 
49 |     print("All set!")
50 |     print("Now, edit your cloudexec.yml file, and update the `aws_key_pairs` entry to the following:")
51 | 
52 |     print()
53 |     print("aws_key_pairs:")
54 |     for region in config.aws_regions:
55 |         print("    - {region}: {key_name}".format(region=region,
56 |                                                   key_name=key_names[region]))
57 | 


--------------------------------------------------------------------------------
/labs/lab5/scripts/resume_training.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from utils import SnapshotSaver
 3 | import click
 4 | import logger
 5 | 
 6 | 
 7 | @click.command()
 8 | @click.argument("dir")  # , "Directory which contains snapshot files")
 9 | @click.option("--interval", help="Interval between saving snapshots", type=int, default=10)
10 | def main(dir, interval):
11 |     with logger.session(dir):
12 |         saver = SnapshotSaver(dir, interval=interval)
13 |         state = saver.get_state()
14 |         alg_state = state['alg_state']
15 |         env = alg_state['env_maker'].make()
16 |         alg = state['alg']
17 |         alg(env=env, snapshot_saver=saver, **alg_state)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     main()
22 | 


--------------------------------------------------------------------------------
/labs/lab5/scripts/setup_xquartz.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Check if XQuartz is installed
 3 | 
 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@"
 5 | 
 6 | app_dir=/Applications/Utilities/XQuartz.app
 7 | 
 8 | if [ -d $app_dir ]; then
 9 |     # Check installed version
10 |     app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString)
11 |     if [ $app_version == "2.7.11" ]; then
12 |         defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
13 |         defaults write org.macosforge.xquartz.X11 no_auth -bool false
14 |         defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
15 |         echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!"
16 |         exit
17 |     else
18 |         read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response
19 |         case "$response" in
20 |             [yY][eE][sS]|[yY]) 
21 |                 ;;
22 |             *)
23 |                 exit
24 |                 ;;
25 |         esac
26 |     fi
27 | fi
28 | 
29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg
30 | dmg_path=/tmp/xquartz.dmg
31 | echo "Downloading dmg from $url..."
32 | /usr/bin/curl -L -o $dmg_path $url
33 | echo "Mounting dmg file..."
34 | hdiutil mount $dmg_path
35 | echo "Installing..."
36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg  -target /
37 | 
38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false
40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
41 | 
42 | echo "Done! Make sure to log out and then log back in for the changes to take effect."
43 | 


--------------------------------------------------------------------------------
/labs/lab5/scripts/sim_policy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from utils import SnapshotSaver
 3 | import click
 4 | import time
 5 | import os
 6 | 
 7 | 
 8 | @click.command()
 9 | @click.argument("dir")
10 | def main(dir):
11 |     env = None
12 |     while True:
13 |         saver = SnapshotSaver(dir)
14 |         state = saver.get_state()
15 |         if state is None:
16 |             time.sleep(1)
17 |             continue
18 |         alg_state = state['alg_state']
19 |         if env is None:
20 |             env = alg_state['env_maker'].make()
21 |         policy = alg_state['policy']
22 |         ob = env.reset()
23 |         done = False
24 |         while not done:
25 |             action, _ = policy.get_action(ob)
26 |             ob, _, done, _ = env.step(action)
27 |             env.render()
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/labs/lab5/scripts/sync_s3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import cloudexec
 4 | import os
 5 | import argparse
 6 | import subprocess
 7 | 
 8 | if __name__ == "__main__":
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument('folder', type=str)
11 |     parser.add_argument('--all', action='store_true', default=False)
12 |     args = parser.parse_args()
13 |     remote_dir = "s3://{bucket}/{bucket_root}/experiments".format(
14 |         bucket=cloudexec.get_cloudexec_config().s3_bucket,
15 |         bucket_root=cloudexec.get_cloudexec_config().s3_bucket_root
16 |     )
17 |     local_dir = os.path.join(cloudexec.get_project_root(), "data", "s3")
18 |     if args.folder:
19 |         remote_dir = os.path.join(remote_dir, args.folder)
20 |         local_dir = os.path.join(local_dir, args.folder)
21 |     s3_env = dict(
22 |         os.environ,
23 |         AWS_ACCESS_KEY_ID=cloudexec.get_cloudexec_config().aws_access_key,
24 |         AWS_SECRET_ACCESS_KEY=cloudexec.get_cloudexec_config().aws_access_secret,
25 |         AWS_REGION=cloudexec.get_cloudexec_config().aws_s3_region,
26 |     )
27 |     if not args.all:
28 |         command = ("""
29 |             aws s3 sync --exclude '*' {s3_periodic_sync_include_flags} --content-type "UTF-8" {remote_dir} {local_dir} 
30 |         """.format(local_dir=local_dir, remote_dir=remote_dir,
31 |                    s3_periodic_sync_include_flags=cloudexec.get_cloudexec_config().s3_periodic_sync_include_flags))
32 |     else:
33 |         command = ("""
34 |             aws s3 sync --content-type "UTF-8" {remote_dir} {local_dir}
35 |         """.format(local_dir=local_dir, remote_dir=remote_dir))
36 |     subprocess.check_call(command, shell=True, env=s3_env)
37 | 


--------------------------------------------------------------------------------
/labs/lab5/scripts/test_ec2_setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | def main():
 5 |     import cloudexec
 6 |     import boto3
 7 |     import botocore.exceptions
 8 |     import os
 9 |     import subprocess
10 |     config = cloudexec.get_cloudexec_config()
11 | 
12 |     assert len({
13 |         config.attendee_id,
14 |         config.ec2_instance_label,
15 |         config.s3_bucket_root
16 |     }) == 1, "attendee_id, ec2_instance_label, s3_bucket_root should have the same value"
17 | 
18 |     print("Testing attendee_id, aws_access_key, and aws_access_secret...")
19 | 
20 |     iam_client = boto3.client(
21 |         "iam",
22 |         region_name=config.aws_regions[0],
23 |         aws_access_key_id=config.aws_access_key,
24 |         aws_secret_access_key=config.aws_access_secret,
25 |     )
26 |     try:
27 |         iam_client.list_access_keys(UserName=config.attendee_id)
28 |     except botocore.exceptions.ClientError as e:
29 |         if e.response['Error']['Code'] == 'InvalidClientTokenId':
30 |             print("aws_access_key is not set properly!")
31 |             exit()
32 |         elif e.response['Error']['Code'] == 'SignatureDoesNotMatch':
33 |             print("aws_access_secret is not set properly!")
34 |             exit()
35 |         elif e.response['Error']['Code'] == 'AccessDenied':
36 |             print("attendee_id is not set properly!")
37 |             exit()
38 |         else:
39 |             raise e
40 | 
41 |     # Check if key pair exists
42 | 
43 |     for region in config.aws_regions:
44 |         print("Checking key pair in region %s" % region)
45 |         if region not in config.aws_key_pairs:
46 |             print("Key pair in region %s is not set properly!" % region)
47 |             exit()
48 |         key_pair_name = config.aws_key_pairs[region]
49 |         key_pair_path = cloudexec.local_ec2_key_pair_path(key_pair_name)
50 |         if not os.path.exists(key_pair_path):
51 |             print("Missing local key pair file at %s" % key_pair_path)
52 |             exit()
53 |         ec2_client = boto3.client(
54 |             "ec2",
55 |             region_name=region,
56 |             aws_access_key_id=config.aws_access_key,
57 |             aws_secret_access_key=config.aws_access_secret,
58 |         )
59 |         try:
60 |             response = ec2_client.describe_key_pairs(
61 |                 KeyNames=[config.aws_key_pairs[region]]
62 |             )
63 |         except botocore.exceptions.ClientError as e:
64 |             if e.response['Error']['Code'] == 'InvalidKeyPair.NotFound':
65 |                 print("Key pair in region %s is not set properly!" % region)
66 |                 exit()
67 |             else:
68 |                 raise e
69 |         remote_fingerprint = response['KeyPairs'][0]['KeyFingerprint']
70 | 
71 |         # Get local key fingerprint
72 | 
73 |         ps = subprocess.Popen(
74 |             ["openssl", "pkcs8", "-in", key_pair_path,
75 |                 "-nocrypt", "-topk8", "-outform", "DER"],
76 |             stdout=subprocess.PIPE
77 |         )
78 |         local_fingerprint = subprocess.check_output(
79 |             ["openssl", "sha1", "-c"], stdin=ps.stdout)
80 |         # Strip irrelevant information
81 |         local_fingerprint = local_fingerprint.decode().split('= ')[-1][:-1]
82 | 
83 |         if remote_fingerprint != local_fingerprint:
84 |             print("Local key pair file does not match EC2 record!")
85 |             exit()
86 | 
87 |     print("Your EC2 configuration has passed all checks!")
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/labs/lab5/scripts/test_environment_setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | def main():
 5 |     import roboschool
 6 |     import gym
 7 |     import chainer
 8 |     env = gym.make('CartPole-v0')
 9 |     env.reset()
10 |     env.step(env.action_space.sample())
11 |     env = gym.make('RoboschoolHalfCheetah-v1')
12 |     env.reset()
13 |     env.step(env.action_space.sample())
14 |     print("Your environment has been successfully set up!")
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/labs/lab5/simplepg/point_env.py:
--------------------------------------------------------------------------------
 1 | from gym import Env
 2 | from gym.envs.registration import register
 3 | from gym.utils import seeding
 4 | from gym import spaces
 5 | from gym.envs.classic_control.cartpole import CartPoleEnv
 6 | import numpy as np
 7 | 
 8 | 
 9 | class PointEnv(Env):
10 |     metadata = {
11 |         'render.modes': ['human', 'rgb_array'],
12 |         'video.frames_per_second': 50
13 |     }
14 | 
15 |     def __init__(self):
16 |         self.action_space = spaces.Box(low=-1, high=1, shape=(2,))
17 |         self.observation_space = spaces.Box(low=-1, high=1, shape=(2,))
18 | 
19 |         self._seed()
20 |         self.viewer = None
21 |         self.state = None
22 | 
23 |     def _seed(self, seed=None):
24 |         self.np_random, seed = seeding.np_random(seed)
25 |         return [seed]
26 | 
27 |     def _step(self, action):
28 |         action = np.clip(action, -0.025, 0.025)
29 |         self.state = np.clip(self.state + action, -1, 1)
30 |         return np.array(self.state), -np.linalg.norm(self.state), False, {}
31 | 
32 |     def _reset(self):
33 |         while True:
34 |             self.state = self.np_random.uniform(low=-1, high=1, size=(2,))
35 |             # Sample states that are far away
36 |             if np.linalg.norm(self.state) > 0.9:
37 |                 break
38 |         return np.array(self.state)
39 | 
40 |     # def _render(self, mode='human', close=False):
41 |     #     pass
42 | 
43 |     def _render(self, mode='human', close=False):
44 |         if close:
45 |             if self.viewer is not None:
46 |                 self.viewer.close()
47 |                 self.viewer = None
48 |             return
49 | 
50 |         screen_width = 800
51 |         screen_height = 800
52 | 
53 |         if self.viewer is None:
54 |             from gym.envs.classic_control import rendering
55 |             self.viewer = rendering.Viewer(screen_width, screen_height)
56 | 
57 |             agent = rendering.make_circle(
58 |                 min(screen_height, screen_width) * 0.03)
59 |             origin = rendering.make_circle(
60 |                 min(screen_height, screen_width) * 0.03)
61 |             trans = rendering.Transform(translation=(0, 0))
62 |             agent.add_attr(trans)
63 |             self.trans = trans
64 |             agent.set_color(1, 0, 0)
65 |             origin.set_color(0, 0, 0)
66 |             origin.add_attr(rendering.Transform(
67 |                 translation=(screen_width // 2, screen_height // 2)))
68 |             self.viewer.add_geom(agent)
69 |             self.viewer.add_geom(origin)
70 | 
71 |         # self.trans.set_translation(0, 0)
72 |         self.trans.set_translation(
73 |             (self.state[0] + 1) / 2 * screen_width,
74 |             (self.state[1] + 1) / 2 * screen_height,
75 |         )
76 | 
77 |         return self.viewer.render(return_rgb_array=mode == 'rgb_array')
78 | 
79 | 
80 | register(
81 |     'Point-v0',
82 |     entry_point='simplepg.point_env:PointEnv',
83 |     timestep_limit=40,
84 | )
85 | 


--------------------------------------------------------------------------------
/labs/lab5/simplepg/rollout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import click
 3 | import numpy as np
 4 | import gym
 5 | 
 6 | from simplepg.simple_utils import include_bias, weighted_sample
 7 | 
 8 | 
 9 | def point_get_action(theta, ob, rng=np.random):
10 |     ob_1 = include_bias(ob)
11 |     mean = theta.dot(ob_1)
12 |     return rng.normal(loc=mean, scale=1.)
13 | 
14 | 
15 | def cartpole_get_action(theta, ob, rng=np.random):
16 |     ob_1 = include_bias(ob)
17 |     logits = ob_1.dot(theta.T)
18 |     return weighted_sample(logits, rng=rng)
19 | 
20 | 
21 | @click.command()
22 | @click.argument("env_id", type=str, default="Point-v0")
23 | def main(env_id):
24 |     # Register the environment
25 |     rng = np.random.RandomState(42)
26 | 
27 |     if env_id == 'CartPole-v0':
28 |         env = gym.make('CartPole-v0')
29 |         get_action = cartpole_get_action
30 |         obs_dim = env.observation_space.shape[0]
31 |         action_dim = env.action_space.n
32 |     elif env_id == 'Point-v0':
33 |         from simplepg import point_env
34 |         env = gym.make('Point-v0')
35 |         get_action = point_get_action
36 |         obs_dim = env.observation_space.shape[0]
37 |         action_dim = env.action_space.shape[0]
38 |     else:
39 |         raise ValueError(
40 |             "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'")
41 | 
42 |     env.seed(42)
43 | 
44 |     # Initialize parameters
45 |     theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1))
46 | 
47 |     while True:
48 |         ob = env.reset()
49 |         done = False
50 |         # Only render the first trajectory
51 |         # Collect a new trajectory
52 |         rewards = []
53 |         while not done:
54 |             action = get_action(theta, ob, rng=rng)
55 |             next_ob, rew, done, _ = env.step(action)
56 |             ob = next_ob
57 |             env.render()
58 |             rewards.append(rew)
59 | 
60 |         print("Episode reward: %.2f" % np.sum(rewards))
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     main()
65 | 


--------------------------------------------------------------------------------
/labs/lab5/simplepg/simple_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.special
  3 | import chainer
  4 | 
  5 | 
  6 | # Compute gradient approximately using finite difference
  7 | def numerical_grad(f, x, eps=1e-8):
  8 |     grad = np.zeros_like(x)
  9 |     for i in range(len(x)):
 10 |         xplus = np.array(x)
 11 |         xplus[i] += eps
 12 |         fplus = f(xplus)
 13 |         xminus = np.array(x)
 14 |         xminus[i] -= eps
 15 |         fminus = f(xminus)
 16 |         grad[i] = (fplus - fminus) / (2 * eps)
 17 |     return grad
 18 | 
 19 | 
 20 | def gradient_check(f, g, x):
 21 |     # Test the implementation of g(x) = df/dx
 22 |     # Perform numerical differentiation and test it
 23 |     g_num = numerical_grad(f, x)
 24 |     g_test = g(x)
 25 |     try:
 26 |         np.testing.assert_allclose(g_num, g_test, rtol=1e-5)
 27 |         print("Gradient check passed!")
 28 |     except AssertionError as e:
 29 |         print(e)
 30 |         print("Error: Gradient check didn't pass!")
 31 |         exit()
 32 | 
 33 | 
 34 | def log_softmax(logits):
 35 |     return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True)
 36 | 
 37 | 
 38 | def softmax(logits):
 39 |     x = logits
 40 |     x = x - np.max(x, axis=-1, keepdims=True)
 41 |     x = np.exp(x)
 42 |     return x / np.sum(x, axis=-1, keepdims=True)
 43 | 
 44 | 
 45 | def weighted_sample(logits, rng=np.random):
 46 |     weights = softmax(logits)
 47 |     return min(
 48 |         int(np.sum(rng.uniform() > np.cumsum(weights))),
 49 |         len(weights) - 1
 50 |     )
 51 | 
 52 | 
 53 | def include_bias(x):
 54 |     # Add a constant term (1.0) to each entry in x
 55 |     return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1)
 56 | 
 57 | 
 58 | _tested = set()
 59 | _tests = dict()
 60 | 
 61 | nprs = np.random.RandomState
 62 | 
 63 | 
 64 | def register_test(fn_name, kwargs, desired_output=None):
 65 |     assert fn_name not in _tests
 66 |     _tests[fn_name] = (kwargs, desired_output)
 67 | 
 68 | 
 69 | def assert_allclose(a, b):
 70 |     if isinstance(a, (np.ndarray, float, int)):
 71 |         np.testing.assert_allclose(a, b, rtol=1e-5)
 72 |     elif isinstance(a, (tuple, list)):
 73 |         assert isinstance(b, (tuple, list))
 74 |         assert len(a) == len(b)
 75 |         for a_i, b_i in zip(a, b):
 76 |             assert_allclose(a_i, b_i)
 77 |     elif isinstance(a, chainer.Variable):
 78 |         assert isinstance(b, chainer.Variable)
 79 |         assert_allclose(a.data, b.data)
 80 |     else:
 81 |         raise NotImplementedError
 82 | 
 83 | 
 84 | def test_once(fn):
 85 |     module = fn.__module__
 86 |     name = fn.__name__
 87 |     key = module + "." + name
 88 |     if key in _tested:
 89 |         return
 90 |     assert key in _tests, "Test for %s not found!" % key
 91 |     kwargs, desired_output = _tests[key]
 92 |     _tested.add(key)
 93 | 
 94 |     if callable(kwargs):
 95 |         kwargs = kwargs()
 96 | 
 97 |     if callable(desired_output):
 98 |         desired_output = desired_output()
 99 | 
100 |     if desired_output is None:
101 |         print("Desired output for %s:" % key, repr(fn(**kwargs)))
102 |         exit()
103 |     else:
104 |         try:
105 |             output = fn(**kwargs)
106 |             assert_allclose(desired_output, output)
107 |             print("Test for %s passed!" % key)
108 |         except AssertionError as e:
109 |             print(e)
110 |             print("Error: test for %s didn't pass!" % key)
111 |             exit()
112 | 


--------------------------------------------------------------------------------
/labs/lab5/tests/a2c_tests.py:
--------------------------------------------------------------------------------
 1 | from simplepg.simple_utils import register_test, nprs
 2 | import numpy as np
 3 | from chainer import Variable
 4 | 
 5 | register_test(
 6 |     "a2c.compute_returns_advantages",
 7 |     kwargs=lambda: dict(
 8 |         rewards=nprs(0).uniform(size=(5, 2)),
 9 |         dones=nprs(1).choice([True, False], size=(5, 2)),
10 |         values=nprs(2).uniform(size=(5, 2)),
11 |         next_values=nprs(3).uniform(size=(2,)),
12 |         discount=0.99,
13 |     ),
14 |     desired_output=lambda: (
15 |         np.array([[1.14554925, 1.25462372],
16 |                   [0.60276338, 0.54488318],
17 |                   [2.33579066, 1.90456042],
18 |                   [1.93145037, 1.2713801],
19 |                   [1.50895268, 0.38344152]]),
20 |         np.array([[0.70955434, 1.22869749],
21 |                   [0.0531009, 0.10956079],
22 |                   [1.91542286, 1.5742256],
23 |                   [1.72680173, 0.65210914],
24 |                   [1.20929801, 0.11661424]])
25 |     )
26 | )
27 | 
28 | register_test(
29 |     "a2c.compute_total_loss",
30 |     kwargs=lambda: dict(
31 |         logli=Variable(nprs(0).uniform(size=(10,)).astype(np.float32)),
32 |         all_advs=Variable(nprs(1).uniform(size=(10,)).astype(np.float32)),
33 |         ent_coeff=nprs(2).uniform(),
34 |         ent=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)),
35 |         vf_loss_coeff=nprs(4).uniform(),
36 |         all_returns=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)),
37 |         all_values=Variable(nprs(6).uniform(size=(10,)).astype(np.float32)),
38 |     ),
39 |     desired_output=lambda: (
40 |         Variable(np.array(-0.4047563076019287, dtype=np.float32)),
41 |         Variable(np.array(0.22883716225624084, dtype=np.float32)),
42 |         Variable(np.array(-0.1834639459848404, dtype=np.float32))
43 |     )
44 | )
45 | 


--------------------------------------------------------------------------------
/labs/lab5/tests/pg_tests.py:
--------------------------------------------------------------------------------
 1 | from chainer import Variable
 2 | 
 3 | from simplepg.simple_utils import register_test, nprs
 4 | from utils import Gaussian
 5 | import numpy as np
 6 | 
 7 | register_test(
 8 |     "pg.compute_surr_loss",
 9 |     kwargs=lambda: dict(
10 |         dists=Gaussian(
11 |             means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)),
12 |             log_stds=Variable(nprs(1).uniform(
13 |                 size=(10, 3)).astype(np.float32)),
14 |         ),
15 |         all_acts=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)),
16 |         all_advs=Variable(nprs(3).uniform(size=(10,)).astype(np.float32)),
17 |     ),
18 |     desired_output=lambda: Variable(
19 |         np.array(1.9201269149780273, dtype=np.float32))
20 | )
21 | 


--------------------------------------------------------------------------------
/labs/lab5/tests/simplepg_tests.py:
--------------------------------------------------------------------------------
 1 | from simplepg.simple_utils import register_test, nprs
 2 | import numpy as np
 3 | 
 4 | register_test(
 5 |     "__main__.compute_update",
 6 |     kwargs=lambda: dict(
 7 |         discount=0.99,
 8 |         R_tplus1=1.0,
 9 |         theta=nprs(0).uniform(size=(2, 2)),
10 |         s_t=nprs(1).uniform(size=(1,)),
11 |         a_t=nprs(2).choice(2),
12 |         r_t=nprs(3).uniform(),
13 |         b_t=nprs(4).uniform(),
14 |         get_grad_logp_action=lambda theta, *_: theta * 2
15 |     ),
16 |     desired_output=lambda: (
17 |         1.5407979025745755,
18 |         np.array([[0.62978332, 0.82070564], [0.69169275, 0.62527314]])
19 |     )
20 | )
21 | 
22 | register_test(
23 |     "__main__.compute_baselines",
24 |     kwargs=lambda: dict(
25 |         all_returns=[
26 |             nprs(0).uniform(size=(10,)),
27 |             nprs(1).uniform(size=(20,)),
28 |             [],
29 |         ],
30 |     ),
31 |     desired_output=lambda: np.array([0.61576628, 0.36728075, 0.])
32 | )
33 | 
34 | register_test(
35 |     "__main__.compute_fisher_matrix",
36 |     kwargs=lambda: dict(
37 |         theta=nprs(1).uniform(size=(2, 2)),
38 |         get_grad_logp_action=lambda theta, ob, action: np.exp(
39 |             theta) * np.linalg.norm(action),
40 |         all_observations=list(nprs(2).uniform(size=(5, 1))),
41 |         all_actions=list(nprs(3).choice(2, size=(5,))),
42 |     ),
43 |     desired_output=lambda: np.array([[0.92104469, 1.24739299, 0.60704379, 0.82124306],
44 |                                      [1.24739299, 1.68937435,
45 |                                          0.82213401, 1.11222925],
46 |                                      [0.60704379, 0.82213401,
47 |                                          0.40009151, 0.54126635],
48 |                                      [0.82124306, 1.11222925, 0.54126635, 0.73225564]])
49 | )
50 | 
51 | register_test(
52 |     "__main__.compute_natural_gradient",
53 |     kwargs=lambda: dict(
54 |         F=nprs(0).uniform(size=(4, 4)),
55 |         grad=nprs(1).uniform(size=(2, 2)),
56 |         reg=1e-3,
57 |     ),
58 |     desired_output=lambda: np.array(
59 |         [[-0.44691565, 0.5477328], [-0.20366472, 0.72267091]])
60 | )
61 | 
62 | register_test(
63 |     "__main__.compute_step_size",
64 |     kwargs=lambda: dict(
65 |         F=nprs(0).uniform(size=(2, 2)),
66 |         natural_grad=nprs(1).uniform(size=(1, 2)),
67 |         natural_step_size=1e-2,
68 |     ),
69 |     desired_output=lambda: 0.1607407366467048,
70 | )
71 | 


--------------------------------------------------------------------------------
/labs/lab5/tests/trpo_tests.py:
--------------------------------------------------------------------------------
 1 | from simplepg.simple_utils import register_test, nprs
 2 | import numpy as np
 3 | from chainer import Variable
 4 | 
 5 | from utils import Gaussian
 6 | 
 7 | register_test(
 8 |     "trpo.compute_surr_loss",
 9 |     kwargs=lambda: dict(
10 |         old_dists=Gaussian(
11 |             means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)),
12 |             log_stds=Variable(nprs(1).uniform(
13 |                 size=(10, 3)).astype(np.float32)),
14 |         ),
15 |         new_dists=Gaussian(
16 |             means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)),
17 |             log_stds=Variable(nprs(3).uniform(
18 |                 size=(10, 3)).astype(np.float32)),
19 |         ),
20 |         all_acts=Variable(nprs(4).uniform(size=(10, 3)).astype(np.float32)),
21 |         all_advs=Variable(nprs(5).uniform(size=(10,)).astype(np.float32)),
22 |     ),
23 |     desired_output=lambda: Variable(
24 |         np.array(-0.5629823207855225, dtype=np.float32))
25 | )
26 | 
27 | register_test(
28 |     "trpo.compute_kl",
29 |     kwargs=lambda: dict(
30 |         old_dists=Gaussian(
31 |             means=Variable(nprs(0).uniform(size=(10, 3)).astype(np.float32)),
32 |             log_stds=Variable(nprs(1).uniform(
33 |                 size=(10, 3)).astype(np.float32)),
34 |         ),
35 |         new_dists=Gaussian(
36 |             means=Variable(nprs(2).uniform(size=(10, 3)).astype(np.float32)),
37 |             log_stds=Variable(nprs(3).uniform(
38 |                 size=(10, 3)).astype(np.float32)),
39 |         ),
40 |     ),
41 |     desired_output=lambda: Variable(
42 |         np.array(0.5306503176689148, dtype=np.float32))
43 | )
44 | 


--------------------------------------------------------------------------------
/labs/lab5/viskit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/lab5/viskit/__init__.py


--------------------------------------------------------------------------------
/labs/lab5/viskit/core.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import itertools
  3 | import json
  4 | import os
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | # from sandbox.rocky.utils.py_utils import AttrDict
 10 | 
 11 | class AttrDict(dict):
 12 |     def __init__(self, *args, **kwargs):
 13 |         super(AttrDict, self).__init__(*args, **kwargs)
 14 |         self.__dict__ = self
 15 | 
 16 | 
 17 | def unique(l):
 18 |     return list(set(l))
 19 | 
 20 | 
 21 | def flatten(l):
 22 |     return [item for sublist in l for item in sublist]
 23 | 
 24 | 
 25 | def load_progress(progress_json_path, verbose=True):
 26 |     if verbose:
 27 |         print("Reading %s" % progress_json_path)
 28 |     entries = dict()
 29 |     rows = []
 30 |     with open(progress_json_path, 'r') as f:
 31 |         lines = f.read().split('\n')
 32 |         for line in lines:
 33 |             if len(line) > 0:
 34 |                 row = json.loads(line)
 35 |                 rows.append(row)
 36 |     all_keys = set(k for row in rows for k in row.keys())
 37 |     for k in all_keys:
 38 |         if k not in entries:
 39 |             entries[k] = []
 40 |         for row in rows:
 41 |             if k in row:
 42 |                 v = row[k]
 43 |                 try:
 44 |                     entries[k].append(float(v))
 45 |                 except:
 46 |                     entries[k].append(np.nan)
 47 |             else:
 48 |                 entries[k].append(np.nan)
 49 | 
 50 |         # entries[key] = [row.get(key, np.nan) for row in rows]
 51 |         #         added_keys = set()
 52 |         #         for k, v in row.items():
 53 |         #             if k not in entries:
 54 |         #                 entries[k] = []
 55 |         #             try:
 56 |         #                 entries[k].append(float(v))
 57 |         #             except:
 58 |         #                 entries[k].append(0.)
 59 |         #             added_keys.add(k)
 60 |         #         for k in entries.keys():
 61 |         #             if k not in added_keys:
 62 |         #                 entries[k].append(np.nan)
 63 |     entries = dict([(k, np.array(v)) for k, v in entries.items()])
 64 |     return entries
 65 | 
 66 | 
 67 | def flatten_dict(d):
 68 |     flat_params = dict()
 69 |     for k, v in d.items():
 70 |         if isinstance(v, dict):
 71 |             v = flatten_dict(v)
 72 |             for subk, subv in flatten_dict(v).items():
 73 |                 flat_params[k + "." + subk] = subv
 74 |         else:
 75 |             flat_params[k] = v
 76 |     return flat_params
 77 | 
 78 | 
 79 | def load_params(params_json_path):
 80 |     with open(params_json_path, 'r') as f:
 81 |         data = json.loads(f.read())
 82 |         if "args_data" in data:
 83 |             del data["args_data"]
 84 |         if "exp_name" not in data:
 85 |             data["exp_name"] = params_json_path.split("/")[-2]
 86 |     return data
 87 | 
 88 | 
 89 | def lookup(d, keys):
 90 |     if not isinstance(keys, list):
 91 |         keys = keys.split(".")
 92 |     for k in keys:
 93 |         if hasattr(d, "__getitem__"):
 94 |             if k in d:
 95 |                 d = d[k]
 96 |             else:
 97 |                 return None
 98 |         else:
 99 |             return None
100 |     return d
101 | 
102 | 
103 | def load_exps_data(exp_folder_paths, ignore_missing_keys=False, verbose=True):
104 |     if isinstance(exp_folder_paths, str):
105 |         exp_folder_paths = [exp_folder_paths]
106 |     exps = []
107 |     for exp_folder_path in exp_folder_paths:
108 |         exps += [x[0] for x in os.walk(exp_folder_path)]
109 |     if verbose:
110 |         print("finished walking exp folders")
111 |     exps_data = []
112 |     for exp in exps:
113 |         try:
114 |             exp_path = exp
115 |             variant_json_path = os.path.join(exp_path, "variant.json")
116 |             progress_json_path = os.path.join(exp_path, "progress.json")
117 |             progress = load_progress(progress_json_path, verbose=verbose)
118 |             try:
119 |                 params = load_params(variant_json_path)
120 |             except IOError:
121 |                 params = dict(exp_name="experiment")
122 |             exps_data.append(AttrDict(
123 |                 progress=progress, params=params, flat_params=flatten_dict(params)))
124 |         except IOError as e:
125 |             if verbose:
126 |                 print(e)
127 | 
128 |     # a dictionary of all keys and types of values
129 |     all_keys = dict()
130 |     for data in exps_data:
131 |         for key in data.flat_params.keys():
132 |             if key not in all_keys:
133 |                 all_keys[key] = type(data.flat_params[key])
134 | 
135 |     # if any data does not have some key, specify the value of it
136 |     if not ignore_missing_keys:
137 |         default_values = dict()
138 |         for data in exps_data:
139 |             for key in sorted(all_keys.keys()):
140 |                 if key not in data.flat_params:
141 |                     if key not in default_values:
142 |                         default = None
143 |                         default_values[key] = default
144 |                     data.flat_params[key] = default_values[key]
145 | 
146 |     return exps_data
147 | 
148 | 
149 | def smart_repr(x):
150 |     if isinstance(x, tuple):
151 |         if len(x) == 0:
152 |             return "tuple()"
153 |         elif len(x) == 1:
154 |             return "(%s,)" % smart_repr(x[0])
155 |         else:
156 |             return "(" + ",".join(map(smart_repr, x)) + ")"
157 |     else:
158 |         if hasattr(x, "__call__"):
159 |             return "__import__('pydoc').locate('%s')" % (x.__module__ + "." + x.__name__)
160 |         else:
161 |             return repr(x)
162 | 
163 | 
164 | def extract_distinct_params(exps_data, excluded_params=('exp_name', 'seed', 'log_dir'), l=1):
165 |     try:
166 |         stringified_pairs = sorted(
167 |             map(
168 |                 eval,
169 |                 unique(
170 |                     flatten(
171 |                         [
172 |                             list(
173 |                                 map(
174 |                                     smart_repr,
175 |                                     list(d.flat_params.items())
176 |                                 )
177 |                             )
178 |                             for d in exps_data
179 |                         ]
180 |                     )
181 |                 )
182 |             ),
183 |             key=lambda x: (
184 |                 tuple("" if it is None else str(it) for it in x),
185 |             )
186 |         )
187 |     except Exception as e:
188 |         print(e)
189 |         import ipdb
190 |         ipdb.set_trace()
191 |     proposals = [(k, [x[1] for x in v])
192 |                  for k, v in itertools.groupby(stringified_pairs, lambda x: x[0])]
193 |     filtered = [(k, v) for (k, v) in proposals if len(v) > l and all(
194 |         [k.find(excluded_param) != 0 for excluded_param in excluded_params])]
195 |     return filtered
196 | 
197 | 
198 | class Selector(object):
199 |     def __init__(self, exps_data, filters=None, custom_filters=None):
200 |         self._exps_data = exps_data
201 |         if filters is None:
202 |             self._filters = tuple()
203 |         else:
204 |             self._filters = tuple(filters)
205 |         if custom_filters is None:
206 |             self._custom_filters = []
207 |         else:
208 |             self._custom_filters = custom_filters
209 | 
210 |     def where(self, k, v):
211 |         return Selector(self._exps_data, self._filters + ((k, v),), self._custom_filters)
212 | 
213 |     def custom_filter(self, filter):
214 |         return Selector(self._exps_data, self._filters, self._custom_filters + [filter])
215 | 
216 |     def _check_exp(self, exp):
217 |         # or exp.flat_params.get(k, None) is None
218 |         return all(
219 |             ((str(exp.flat_params.get(k, None)) == str(v) or (
220 |                 k not in exp.flat_params)) for k, v in self._filters)
221 |         ) and all(custom_filter(exp) for custom_filter in self._custom_filters)
222 | 
223 |     def extract(self):
224 |         return list(filter(self._check_exp, self._exps_data))
225 | 
226 |     def iextract(self):
227 |         return filter(self._check_exp, self._exps_data)
228 | 
229 | 
230 | # Taken from plot.ly
231 | color_defaults = [
232 |     '#1f77b4',  # muted blue
233 |     '#ff7f0e',  # safety orange
234 |     '#2ca02c',  # cooked asparagus green
235 |     '#d62728',  # brick red
236 |     '#9467bd',  # muted purple
237 |     '#8c564b',  # chestnut brown
238 |     '#e377c2',  # raspberry yogurt pink
239 |     '#7f7f7f',  # middle gray
240 |     '#bcbd22',  # curry yellow-green
241 |     '#17becf'  # blue-teal
242 | ]
243 | 
244 | 
245 | def hex_to_rgb(hex, opacity=1.0):
246 |     if hex[0] == '#':
247 |         hex = hex[1:]
248 |     assert (len(hex) == 6)
249 |     return "rgba({0},{1},{2},{3})".format(int(hex[:2], 16), int(hex[2:4], 16), int(hex[4:6], 16), opacity)
250 | 


--------------------------------------------------------------------------------
/labs/setup.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/setup.pdf


--------------------------------------------------------------------------------
/labs/setup/docker_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | viskit_port=$("$DIR/findport.py" 5000 1)
 4 | xhost=xhost
 5 | if hash nvidia-docker 2>/dev/null; then
 6 |     docker=nvidia-docker
 7 | else
 8 |     docker=docker
 9 | fi
10 | 
11 | if [[ $(uname) == 'Darwin' ]]; then
12 |     # if xhost not defined, check 
13 |     if ! hash $xhost 2>/dev/null; then
14 |         xhost=/opt/X11/bin/xhost
15 |         if [ ! -f $xhost ]; then
16 |             echo "xhost not found!"
17 |             exit
18 |         fi
19 |     fi
20 |     ip=$(ifconfig en0 | grep inet | awk '$1=="inet" {print $2}')
21 |     $xhost + $ip >/dev/null
22 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
23 |         -e DISPLAY=$ip:0 \
24 |         -v "$DIR":/root/code/bootcamp_pg \
25 |         -ti dementrock/deeprlbootcamp \
26 |           ${1-/bin/bash} "${@:2}"
27 |     $xhost - $ip >/dev/null
28 | elif [[ $(uname) == 'Linux' ]]; then
29 |     $xhost +local:root >/dev/null
30 |     $docker run --rm -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
31 |         -e DISPLAY=$DISPLAY \
32 |         -v /tmp/.X11-unix:/tmp/.X11-unix \
33 |         -v "$DIR":/root/code/bootcamp_pg \
34 |         -ti dementrock/deeprlbootcamp \
35 |           ${1-/bin/bash} "${@:2}"
36 |     $xhost -local:root >/dev/null
37 | else
38 |     echo "This script only supports macOS or Linux"
39 | fi
40 | 


--------------------------------------------------------------------------------
/labs/setup/docker_run_vnc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | vnc_port=$("$DIR/findport.py" 3000 1)
 4 | viskit_port=$("$DIR/findport.py" 5000 1)
 5 | 
 6 | echo "Connect to this VNC address to view the display: localhost:$vnc_port Password: 3284"
 7 | docker run --rm -p $vnc_port:5900 -p $viskit_port:$viskit_port -e VISKIT_PORT=$viskit_port \
 8 |     -v "$DIR":/root/code/bootcamp_pg \
 9 |     -ti dementrock/deeprlbootcamp \
10 |       ./launch_bg_screen_buffer.sh ${1-/bin/bash} "${@:2}"
11 | 


--------------------------------------------------------------------------------
/labs/setup/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deeprlbootcamp
 2 | dependencies:
 3 |     - python==3.5.3
 4 |     - numpy==1.13.1
 5 |     - notebook==5.0.0
 6 |     - pip:
 7 |         - gym==0.9.2
 8 |         - chainer==2.0.1
 9 |         - matplotlib==2.0.2
10 | 


--------------------------------------------------------------------------------
/labs/setup/findport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Usage: findport.py 3000 100
 4 | #
 5 | from __future__ import print_function
 6 | import socket
 7 | from contextlib import closing
 8 | import sys
 9 | 
10 | if len(sys.argv) != 3:
11 |     print("Usage: {} <base_port> <increment>".format(sys.argv[0]))
12 |     sys.exit(1)
13 | 
14 | base = int(sys.argv[1])
15 | increment = int(sys.argv[2])
16 | 
17 | 
18 | def find_free_port():
19 |     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
20 |         for port in range(base, 65536, increment):
21 |             try:
22 |                 s.bind(('', port))
23 |                 return s.getsockname()[1]
24 |             except socket.error:
25 |                 continue
26 | 
27 | 
28 | print(find_free_port())
29 | 


--------------------------------------------------------------------------------
/labs/setup/launch_bg_screen_buffer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | killall() {
 4 | kill -INT "$xvfb_pid" 
 5 | kill -INT "$x11vnc_pid" 
 6 | exit
 7 | }
 8 | 
 9 | trap killall SIGINT
10 | trap killall SIGTERM
11 | trap killall SIGKILL
12 | 
13 | Xvfb :99 -screen 0 1024x768x24 -ac  +extension GLX +render +extension RANDR -noreset & export xvfb_pid=$!
14 | 
15 | mkdir ~/.x11vnc
16 | x11vnc -storepasswd 3284 ~/.x11vnc/passwd
17 | 
18 | command="${1-/bin/bash} ${@:2}"
19 | 
20 | env DISPLAY=:99.0 x11vnc -q -nopw -ncache 10 -forever -rfbauth ~/.x11vnc/passwd -display :99 2>/dev/null >/dev/null & export x11vnc_pid="$!"
21 | 
22 | DISPLAY=:99 $command
23 | 
24 | killall
25 | 


--------------------------------------------------------------------------------
/labs/setup/scripts/setup_xquartz.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Check if XQuartz is installed
 3 | 
 4 | [ "$(whoami)" != "root" ] && exec sudo -- "$0" "$@"
 5 | 
 6 | app_dir=/Applications/Utilities/XQuartz.app
 7 | 
 8 | if [ -d $app_dir ]; then
 9 |     # Check installed version
10 |     app_version=$(defaults read $app_dir/Contents/Info CFBundleShortVersionString)
11 |     if [ $app_version == "2.7.11" ]; then
12 |         defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
13 |         defaults write org.macosforge.xquartz.X11 no_auth -bool false
14 |         defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
15 |         echo "Already installed. You are all set (if anything's not working, you may want to try logging out and logging back in, and see if that fixes the issue)!"
16 |         exit
17 |     else
18 |         read -r -p "Detected version $app_version but we want 2.7.11. Proceed to install this version? [y/N] " response
19 |         case "$response" in
20 |             [yY][eE][sS]|[yY]) 
21 |                 ;;
22 |             *)
23 |                 exit
24 |                 ;;
25 |         esac
26 |     fi
27 | fi
28 | 
29 | url=https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.11.dmg
30 | dmg_path=/tmp/xquartz.dmg
31 | echo "Downloading dmg from $url..."
32 | /usr/bin/curl -L -o $dmg_path $url
33 | echo "Mounting dmg file..."
34 | hdiutil mount $dmg_path
35 | echo "Installing..."
36 | sudo installer -pkg /Volumes/XQuartz-2.7.11/XQuartz.pkg  -target /
37 | 
38 | defaults write org.macosforge.xquartz.X11 nolisten_tcp -bool false
39 | defaults write org.macosforge.xquartz.X11 no_auth -bool false
40 | defaults write org.macosforge.xquartz.X11 enable_iglx -bool true
41 | 
42 | echo "Done! Make sure to log out and then log back in for the changes to take effect."
43 | 


--------------------------------------------------------------------------------
/labs/setup/scripts/test_environment_setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | def main():
 5 |     import roboschool
 6 |     import gym
 7 |     import chainer
 8 |     env = gym.make('CartPole-v0')
 9 |     env.reset()
10 |     env.step(env.action_space.sample())
11 |     env = gym.make('RoboschoolHalfCheetah-v1')
12 |     env.reset()
13 |     env.step(env.action_space.sample())
14 |     print("Your environment has been successfully set up!")
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/labs/setup/simplepg/__pycache__/point_env.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/setup/simplepg/__pycache__/point_env.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/setup/simplepg/__pycache__/simple_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/labs/setup/simplepg/__pycache__/simple_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/labs/setup/simplepg/point_env.py:
--------------------------------------------------------------------------------
 1 | from gym import Env
 2 | from gym.envs.registration import register
 3 | from gym.utils import seeding
 4 | from gym import spaces
 5 | from gym.envs.classic_control.cartpole import CartPoleEnv
 6 | import numpy as np
 7 | 
 8 | 
 9 | class PointEnv(Env):
10 |     metadata = {
11 |         'render.modes': ['human', 'rgb_array'],
12 |         'video.frames_per_second': 50
13 |     }
14 | 
15 |     def __init__(self):
16 |         self.action_space = spaces.Box(low=-1, high=1, shape=(2,))
17 |         self.observation_space = spaces.Box(low=-1, high=1, shape=(2,))
18 | 
19 |         self._seed()
20 |         self.viewer = None
21 |         self.state = None
22 | 
23 |     def _seed(self, seed=None):
24 |         self.np_random, seed = seeding.np_random(seed)
25 |         return [seed]
26 | 
27 |     def _step(self, action):
28 |         action = np.clip(action, -0.025, 0.025)
29 |         self.state = np.clip(self.state + action, -1, 1)
30 |         return np.array(self.state), -np.linalg.norm(self.state), False, {}
31 | 
32 |     def _reset(self):
33 |         while True:
34 |             self.state = self.np_random.uniform(low=-1, high=1, size=(2,))
35 |             # Sample states that are far away
36 |             if np.linalg.norm(self.state) > 0.9:
37 |                 break
38 |         return np.array(self.state)
39 | 
40 |     # def _render(self, mode='human', close=False):
41 |     #     pass
42 | 
43 |     def _render(self, mode='human', close=False):
44 |         if close:
45 |             if self.viewer is not None:
46 |                 self.viewer.close()
47 |                 self.viewer = None
48 |             return
49 | 
50 |         screen_width = 800
51 |         screen_height = 800
52 | 
53 |         if self.viewer is None:
54 |             from gym.envs.classic_control import rendering
55 |             self.viewer = rendering.Viewer(screen_width, screen_height)
56 | 
57 |             agent = rendering.make_circle(
58 |                 min(screen_height, screen_width) * 0.03)
59 |             origin = rendering.make_circle(
60 |                 min(screen_height, screen_width) * 0.03)
61 |             trans = rendering.Transform(translation=(0, 0))
62 |             agent.add_attr(trans)
63 |             self.trans = trans
64 |             agent.set_color(1, 0, 0)
65 |             origin.set_color(0, 0, 0)
66 |             origin.add_attr(rendering.Transform(
67 |                 translation=(screen_width // 2, screen_height // 2)))
68 |             self.viewer.add_geom(agent)
69 |             self.viewer.add_geom(origin)
70 | 
71 |         # self.trans.set_translation(0, 0)
72 |         self.trans.set_translation(
73 |             (self.state[0] + 1) / 2 * screen_width,
74 |             (self.state[1] + 1) / 2 * screen_height,
75 |         )
76 | 
77 |         return self.viewer.render(return_rgb_array=mode == 'rgb_array')
78 | 
79 | 
80 | register(
81 |     'Point-v0',
82 |     entry_point='simplepg.point_env:PointEnv',
83 |     timestep_limit=40,
84 | )
85 | 


--------------------------------------------------------------------------------
/labs/setup/simplepg/rollout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import click
 3 | import numpy as np
 4 | import gym
 5 | 
 6 | from simplepg.simple_utils import include_bias, weighted_sample
 7 | 
 8 | 
 9 | def point_get_action(theta, ob, rng=np.random):
10 |     ob_1 = include_bias(ob)
11 |     mean = theta.dot(ob_1)
12 |     return rng.normal(loc=mean, scale=1.)
13 | 
14 | 
15 | def cartpole_get_action(theta, ob, rng=np.random):
16 |     ob_1 = include_bias(ob)
17 |     logits = ob_1.dot(theta.T)
18 |     return weighted_sample(logits, rng=rng)
19 | 
20 | 
21 | @click.command()
22 | @click.argument("env_id", type=str, default="Point-v0")
23 | def main(env_id):
24 |     # Register the environment
25 |     rng = np.random.RandomState(42)
26 | 
27 |     if env_id == 'CartPole-v0':
28 |         env = gym.make('CartPole-v0')
29 |         get_action = cartpole_get_action
30 |         obs_dim = env.observation_space.shape[0]
31 |         action_dim = env.action_space.n
32 |     elif env_id == 'Point-v0':
33 |         from simplepg import point_env
34 |         env = gym.make('Point-v0')
35 |         get_action = point_get_action
36 |         obs_dim = env.observation_space.shape[0]
37 |         action_dim = env.action_space.shape[0]
38 |     else:
39 |         raise ValueError(
40 |             "Unsupported environment: must be one of 'CartPole-v0', 'Point-v0'")
41 | 
42 |     env.seed(42)
43 | 
44 |     # Initialize parameters
45 |     theta = rng.normal(scale=0.01, size=(action_dim, obs_dim + 1))
46 | 
47 |     while True:
48 |         ob = env.reset()
49 |         done = False
50 |         # Only render the first trajectory
51 |         # Collect a new trajectory
52 |         rewards = []
53 |         while not done:
54 |             action = get_action(theta, ob, rng=rng)
55 |             next_ob, rew, done, _ = env.step(action)
56 |             ob = next_ob
57 |             env.render()
58 |             rewards.append(rew)
59 | 
60 |         print("Episode reward: %.2f" % np.sum(rewards))
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     main()
65 | 


--------------------------------------------------------------------------------
/labs/setup/simplepg/simple_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.special
  3 | import chainer
  4 | 
  5 | 
  6 | # Compute gradient approximately using finite difference
  7 | def numerical_grad(f, x, eps=1e-8):
  8 |     grad = np.zeros_like(x)
  9 |     for i in range(len(x)):
 10 |         xplus = np.array(x)
 11 |         xplus[i] += eps
 12 |         fplus = f(xplus)
 13 |         xminus = np.array(x)
 14 |         xminus[i] -= eps
 15 |         fminus = f(xminus)
 16 |         grad[i] = (fplus - fminus) / (2 * eps)
 17 |     return grad
 18 | 
 19 | 
 20 | def gradient_check(f, g, x):
 21 |     # Test the implementation of g(x) = df/dx
 22 |     # Perform numerical differentiation and test it
 23 |     g_num = numerical_grad(f, x)
 24 |     g_test = g(x)
 25 |     try:
 26 |         np.testing.assert_allclose(g_num, g_test, rtol=1e-5)
 27 |         print("Gradient check passed!")
 28 |     except AssertionError as e:
 29 |         print(e)
 30 |         print("Error: Gradient check didn't pass!")
 31 |         exit()
 32 | 
 33 | 
 34 | def log_softmax(logits):
 35 |     return logits - scipy.special.logsumexp(logits, axis=-1, keepdims=True)
 36 | 
 37 | 
 38 | def softmax(logits):
 39 |     x = logits
 40 |     x = x - np.max(x, axis=-1, keepdims=True)
 41 |     x = np.exp(x)
 42 |     return x / np.sum(x, axis=-1, keepdims=True)
 43 | 
 44 | 
 45 | def weighted_sample(logits, rng=np.random):
 46 |     weights = softmax(logits)
 47 |     return min(
 48 |         int(np.sum(rng.uniform() > np.cumsum(weights))),
 49 |         len(weights) - 1
 50 |     )
 51 | 
 52 | 
 53 | def include_bias(x):
 54 |     # Add a constant term (1.0) to each entry in x
 55 |     return np.concatenate([x, np.ones_like(x[..., :1])], axis=-1)
 56 | 
 57 | 
 58 | _tested = set()
 59 | _tests = dict()
 60 | 
 61 | nprs = np.random.RandomState
 62 | 
 63 | 
 64 | def register_test(fn_name, kwargs, desired_output=None):
 65 |     assert fn_name not in _tests
 66 |     _tests[fn_name] = (kwargs, desired_output)
 67 | 
 68 | 
 69 | def assert_allclose(a, b):
 70 |     if isinstance(a, (np.ndarray, float, int)):
 71 |         np.testing.assert_allclose(a, b)
 72 |     elif isinstance(a, (tuple, list)):
 73 |         assert isinstance(b, (tuple, list))
 74 |         assert len(a) == len(b)
 75 |         for a_i, b_i in zip(a, b):
 76 |             assert_allclose(a_i, b_i)
 77 |     elif isinstance(a, chainer.Variable):
 78 |         assert isinstance(b, chainer.Variable)
 79 |         assert_allclose(a.data, b.data)
 80 |     else:
 81 |         raise NotImplementedError
 82 | 
 83 | 
 84 | def test_once(fn):
 85 |     module = fn.__module__
 86 |     name = fn.__name__
 87 |     key = module + "." + name
 88 |     if key in _tested:
 89 |         return
 90 |     assert key in _tests, "Test for %s not found!" % key
 91 |     kwargs, desired_output = _tests[key]
 92 |     _tested.add(key)
 93 | 
 94 |     if callable(kwargs):
 95 |         kwargs = kwargs()
 96 | 
 97 |     if callable(desired_output):
 98 |         desired_output = desired_output()
 99 | 
100 |     if desired_output is None:
101 |         print("Desired output for %s:" % key, repr(fn(**kwargs)))
102 |         exit()
103 |     else:
104 |         try:
105 |             output = fn(**kwargs)
106 |             assert_allclose(desired_output, output)
107 |             print("Test for %s passed!" % key)
108 |         except AssertionError as e:
109 |             print(e)
110 |             print("Error: test for %s didn't pass!" % key)
111 |             exit()
112 | 


--------------------------------------------------------------------------------
/slides/FrontiersPieterAbbeelPeterChenRockyDuan.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/FrontiersPieterAbbeelPeterChenRockyDuan.pdf


--------------------------------------------------------------------------------
/slides/Lec10aUtilities.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec10aUtilities.pdf


--------------------------------------------------------------------------------
/slides/Lec10binverseRL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec10binverseRL.pdf


--------------------------------------------------------------------------------
/slides/Lec1intromdpsexactmethods.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec1intromdpsexactmethods.pdf


--------------------------------------------------------------------------------
/slides/Lec2samplingbasedapproximationsandfunctionfitting.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec2samplingbasedapproximationsandfunctionfitting.pdf


--------------------------------------------------------------------------------
/slides/Lec3DQN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec3DQN.pdf


--------------------------------------------------------------------------------
/slides/Lec4apolicygradientsactorcritic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec4apolicygradientsactorcritic.pdf


--------------------------------------------------------------------------------
/slides/Lec4b_Pong_from_Pixels.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec4b_Pong_from_Pixels.pdf


--------------------------------------------------------------------------------
/slides/Lec5advancedpolicygradientmethods.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec5advancedpolicygradientmethods.pdf


--------------------------------------------------------------------------------
/slides/Lec6nutsandboltsdeeprlresearch.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec6nutsandboltsdeeprlresearch.pdf


--------------------------------------------------------------------------------
/slides/Lec7deeprlbootcampsvgscg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec7deeprlbootcampsvgscg.pdf


--------------------------------------------------------------------------------
/slides/Lec8derivativefree.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec8derivativefree.pdf


--------------------------------------------------------------------------------
/slides/Lec9modelbaseddeeprl.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/Lec9modelbaseddeeprl.pdf


--------------------------------------------------------------------------------
/slides/TAintros.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aitorzip/deepbootcamp/b4ebcfb613dd5f517f9e6ac245b9c34074378f11/slides/TAintros.pdf


--------------------------------------------------------------------------------