├── LICENSE
├── README.md
├── data
    └── sample_pendulum_data.py
└── e2c
    ├── __init__.py
    ├── ae.py
    ├── configs.py
    ├── datasets.py
    ├── e2c.py
    ├── losses.py
    ├── tf_e2c
        ├── LICENSE
        ├── README.md
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-35.pyc
        │   ├── __init__.cpython-36.pyc
        │   ├── dataset.cpython-35.pyc
        │   ├── dataset.cpython-36.pyc
        │   ├── plane_data2.cpython-35.pyc
        │   └── plane_data2.cpython-36.pyc
        ├── dataset.py
        ├── e2c_plane.py
        ├── e2c_seq.py
        ├── env0.png
        ├── env1.png
        ├── env_blank.png
        ├── plane_data2.py
        ├── tests.py
        ├── vae.py
        └── viz_results.py
    └── vae.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2017 Yicheng LUO
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Embed to Control implementation in PyTorch
 2 | 
 3 | Paper can be found here: <https://arxiv.org/abs/1506.07365>
 4 | 
 5 | You will need a patched version of OpenAI Gym in order to generate the
 6 | dataset. See <https://github.com/ethanluoyc/gym/tree/pendulum_internal>
 7 | 
 8 | For the planar task, we use code from. The source code of the repository
 9 | has been modified for our needs and included under `e2c/e2c_tf`.
10 | 
11 | ## What's included ?
12 | * E2C model, VAE and AE baselines. Allow configuration for different
13 | network architecture for the different setups (see Appendix of the paper).
14 | 
15 | ## TODO
16 | * Documentation, tests... (Soon to follow)
17 | 


--------------------------------------------------------------------------------
/data/sample_pendulum_data.py:
--------------------------------------------------------------------------------
1 | from pixel2torque.pytorch.datasets import GymPendulumDatasetV2
2 | import numpy as np
3 | 
4 | np.random.seed(0)
5 | 
6 | GymPendulumDatasetV2.sample(10000, 'data/pendulum_markov')
7 | dataset = GymPendulumDatasetV2('data/pendulum_markov')
8 | 


--------------------------------------------------------------------------------
/e2c/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/__init__.py


--------------------------------------------------------------------------------
/e2c/ae.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Autoencoder baseline
 3 | """
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | from .losses import kl_bernoulli
 8 | 
 9 | 
10 | class AE(nn.Module):
11 |     def __init__(self, dim_in, dim_z, config='pendulum'):
12 |         super(AE, self).__init__()
13 |         _, _, dec = load_config(config)
14 | 
15 |         # TODO, refactor encoder to allow output of dim_z instead of dim_z * 2
16 |         self.encoder = nn.Sequential(
17 |             nn.Linear(dim_in, 800),
18 |             nn.BatchNorm1d(800),
19 |             nn.ReLU(),
20 |             nn.Linear(800, 800),
21 |             nn.BatchNorm1d(800),
22 |             nn.ReLU(),
23 |             nn.Linear(800, dim_z),
24 |             nn.BatchNorm1d(dim_z),
25 |             nn.Sigmoid()
26 |         )
27 | 
28 |         self.decoder = dec(dim_z, dim_in)
29 | 
30 |     def forward(self, x):
31 |         self.z = self.encoder(x)
32 |         return self.decoder(self.z)
33 | 
34 | 
35 | def compute_loss(x_pred, x_true, z_pred, z_true, beta=0.05):
36 |     mse = nn.MSELoss()
37 |     return mse(x_pred, x_true).add(beta * kl_bernoulli(z_pred, z_true))
38 | 
39 | from pixel2torque.pytorch.configs import load_config
40 | 


--------------------------------------------------------------------------------
/e2c/configs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configuration for the encoder, decoder, transition
  3 | for different tasks. Use load_config to find the proper
  4 | set of configuration.
  5 | """
  6 | import torch
  7 | from torch import nn
  8 | from torch.autograd import Variable
  9 | 
 10 | 
 11 | class Encoder(nn.Module):
 12 |     def __init__(self, enc, dim_in, dim_out):
 13 |         super(Encoder, self).__init__()
 14 |         self.m = enc
 15 |         self.dim_int = dim_in
 16 |         self.dim_out = dim_out
 17 | 
 18 |     def forward(self, x):
 19 |         return self.m(x).chunk(2, dim=1)
 20 | 
 21 | 
 22 | class Decoder(nn.Module):
 23 |     def __init__(self, dec, dim_in, dim_out):
 24 |         super(Decoder, self).__init__()
 25 |         self.m = dec
 26 |         self.dim_in = dim_in
 27 |         self.dim_out = dim_out
 28 | 
 29 |     def forward(self, z):
 30 |         return self.m(z)
 31 | 
 32 | 
 33 | class Transition(nn.Module):
 34 |     def __init__(self, trans, dim_z, dim_u):
 35 |         super(Transition, self).__init__()
 36 |         self.trans = trans
 37 |         self.dim_z = dim_z
 38 |         self.dim_u = dim_u
 39 | 
 40 |         self.fc_B = nn.Linear(dim_z, dim_z * dim_u)
 41 |         self.fc_o = nn.Linear(dim_z, dim_z)
 42 | 
 43 |     def forward(self, h, Q, u):
 44 |         batch_size = h.size()[0]
 45 |         v, r = self.trans(h).chunk(2, dim=1)
 46 |         v1 = v.unsqueeze(2)
 47 |         rT = r.unsqueeze(1)
 48 |         I = Variable(torch.eye(self.dim_z).repeat(batch_size, 1, 1))
 49 |         if rT.data.is_cuda:
 50 |             I.dada.cuda()
 51 |         A = I.add(v1.bmm(rT))
 52 | 
 53 |         B = self.fc_B(h).view(-1, self.dim_z, self.dim_u)
 54 |         o = self.fc_o(h)
 55 | 
 56 |         # need to compute the parameters for distributions
 57 |         # as well as for the samples
 58 |         u = u.unsqueeze(2)
 59 | 
 60 |         d = A.bmm(Q.mu.unsqueeze(2)).add(B.bmm(u)).add(o).squeeze(2)
 61 |         sample = A.bmm(h.unsqueeze(2)).add(B.bmm(u)).add(o).squeeze(2)
 62 | 
 63 |         return sample, NormalDistribution(d, Q.sigma, Q.logsigma, v=v, r=r)
 64 | 
 65 | 
 66 | class PlaneEncoder(Encoder):
 67 |     def __init__(self, dim_in, dim_out):
 68 |         m = nn.Sequential(
 69 |             nn.Linear(dim_in, 150),
 70 |             nn.BatchNorm1d(150),
 71 |             nn.ReLU(),
 72 |             nn.Linear(150, 150),
 73 |             nn.BatchNorm1d(150),
 74 |             nn.ReLU(),
 75 |             nn.Linear(150, 150),
 76 |             nn.BatchNorm1d(150),
 77 |             nn.ReLU(),
 78 |             nn.Linear(150, dim_out*2)
 79 |         )
 80 |         super(PlaneEncoder, self).__init__(m, dim_in, dim_out)
 81 | 
 82 | 
 83 | class PlaneDecoder(Decoder):
 84 |     def __init__(self, dim_in, dim_out):
 85 |         m = nn.Sequential(
 86 |             nn.Linear(dim_in, 200),
 87 |             nn.BatchNorm1d(200),
 88 |             nn.ReLU(),
 89 |             nn.Linear(200, 200),
 90 |             nn.BatchNorm1d(200),
 91 |             nn.ReLU(),
 92 |             nn.Linear(200, dim_out),
 93 |             nn.BatchNorm1d(dim_out),
 94 |             nn.Sigmoid()
 95 |         )
 96 |         super(PlaneDecoder, self).__init__(m, dim_in, dim_out)
 97 | 
 98 | 
 99 | class PlaneTransition(Transition):
100 |     def __init__(self, dim_z, dim_u):
101 |         trans = nn.Sequential(
102 |             nn.Linear(dim_z, 100),
103 |             nn.BatchNorm1d(100),
104 |             nn.ReLU(),
105 |             nn.Linear(100, 100),
106 |             nn.BatchNorm1d(100),
107 |             nn.ReLU(),
108 |             nn.Linear(100, dim_z*2)
109 |         )
110 |         super(PlaneTransition, self).__init__(trans, dim_z, dim_u)
111 | 
112 | 
113 | class PendulumEncoder(Encoder):
114 |     def __init__(self, dim_in, dim_out):
115 |         m = nn.ModuleList([
116 |             torch.nn.Linear(dim_in, 800),
117 |             nn.BatchNorm1d(800),
118 |             nn.ReLU(),
119 |             torch.nn.Linear(800, 800),
120 |             nn.BatchNorm1d(800),
121 |             nn.ReLU(),
122 |             nn.Linear(800, 2 * dim_out)
123 |         ])
124 |         super(PendulumEncoder, self).__init__(m, dim_in, dim_out)
125 | 
126 |     def forward(self, x):
127 |         for l in self.m:
128 |             x = l(x)
129 |         return x.chunk(2, dim=1)
130 | 
131 | 
132 | class PendulumDecoder(Decoder):
133 |     def __init__(self, dim_in, dim_out):
134 |         m = nn.ModuleList([
135 |             torch.nn.Linear(dim_in, 800),
136 |             nn.BatchNorm1d(800),
137 |             nn.ReLU(),
138 |             torch.nn.Linear(800, 800),
139 |             nn.BatchNorm1d(800),
140 |             nn.ReLU(),
141 |             nn.Linear(800, dim_out),
142 |             nn.Sigmoid()
143 |         ])
144 |         super(PendulumDecoder, self).__init__(m, dim_in, dim_out)
145 | 
146 |     def forward(self, z):
147 |         for l in self.m:
148 |             z = l(z)
149 |         return z
150 | 
151 | 
152 | class PendulumTransition(Transition):
153 |     def __init__(self, dim_z, dim_u):
154 |         trans = nn.Sequential(
155 |             nn.Linear(dim_z, 100),
156 |             nn.BatchNorm1d(100),
157 |             nn.ReLU(),
158 |             nn.Linear(100, 100),
159 |             nn.BatchNorm1d(100),
160 |             nn.ReLU(),
161 |             nn.Linear(100, dim_z * 2),
162 |             nn.BatchNorm1d(dim_z * 2),
163 |             nn.Sigmoid() # Added to prevent nan
164 |         )
165 |         super(PendulumTransition, self).__init__(trans, dim_z, dim_u)
166 | 
167 | 
168 | _CONFIG_MAP = {
169 |     'plane': (PlaneEncoder, PlaneTransition, PlaneDecoder),
170 |     'pendulum': (PendulumEncoder, PendulumTransition, PendulumDecoder)
171 | }
172 | 
173 | 
174 | def load_config(name):
175 |     """Load a particular configuration
176 |     Returns:
177 |     (encoder, transition, decoder) A tuple containing class constructors
178 |     """
179 |     if name not in _CONFIG_MAP.keys():
180 |         raise ValueError("Unknown config: %s", name)
181 |     return _CONFIG_MAP[name]
182 | 
183 | from .e2c import NormalDistribution
184 | 
185 | __all__ = ['load_config']
186 | 


--------------------------------------------------------------------------------
/e2c/datasets.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | from os import path
  4 | 
  5 | from PIL import Image
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | import gym
 10 | import json
 11 | from datetime import datetime
 12 | from torchvision.transforms import ToTensor
 13 | from torch.utils.data import Dataset
 14 | from .tf_e2c.plane_data2 import T, num_t
 15 | from skimage.transform import resize
 16 | from skimage.color import rgb2gray
 17 | from tqdm import trange, tqdm
 18 | import pickle
 19 | 
 20 | 
 21 | class PendulumData(Dataset):
 22 |     def __init__(self, root, split):
 23 |         if split not in ['train', 'test', 'all']:
 24 |             raise ValueError
 25 | 
 26 |         dir = os.path.join(root, split)
 27 |         filenames = glob.glob(os.path.join(dir, '*.png'))
 28 | 
 29 |         if split == 'all':
 30 |             filenames = glob.glob(os.path.join(root, 'train/*.png'))
 31 |             filenames.extend(glob.glob(os.path.join(root, 'test/*.png')))
 32 | 
 33 |         filenames = sorted(
 34 |             filenames, key=lambda x: int(os.path.basename(x).split('.')[0]))
 35 | 
 36 |         images = []
 37 | 
 38 |         for f in filenames:
 39 |             img = plt.imread(f)
 40 |             img[img != 1] = 0
 41 |             images.append(resize(rgb2gray(img), [48, 48], mode='constant'))
 42 | 
 43 |         self.images = np.array(images, dtype=np.float32)
 44 |         self.images = self.images.reshape([len(images), 48, 48, 1])
 45 | 
 46 |         action_filename = os.path.join(root, 'actions.txt')
 47 | 
 48 |         with open(action_filename) as infile:
 49 |             actions = np.array([float(l) for l in infile.readlines()])
 50 | 
 51 |         self.actions = actions[:len(self.images)].astype(np.float32)
 52 |         self.actions = self.actions.reshape(len(actions), 1)
 53 | 
 54 |     def __len__(self):
 55 |         return len(self.actions) - 1
 56 | 
 57 |     def __getitem__(self, index):
 58 |         return self.images[index], self.actions[index], self.images[index]
 59 | 
 60 | 
 61 | class PlaneDataset(Dataset):
 62 |     def __init__(self, planedata):
 63 |         self.planedata = planedata
 64 | 
 65 |     def __len__(self):
 66 |         return T * num_t  # Total number of samples
 67 | 
 68 |     def __getitem__(self, index):
 69 |         index = np.random.randint(0, num_t)  # Sample any one of them
 70 |         t = np.random.randint(0, T - 1)
 71 |         x = np.array(self.planedata.getX(index, t))
 72 |         x_next = np.array(self.planedata.getX(index, t + 1))
 73 |         u = np.copy(self.planedata.U[index, t, :])
 74 |         return x, u, x_next
 75 | 
 76 | 
 77 | class GymPendulumDataset(Dataset):
 78 |     """Dataset definition for the Gym Pendulum task"""
 79 |     width = 40
 80 |     height = 40
 81 |     action_dim = 1
 82 |     """Sample from the OpenAI Gym environment, requires a patched version of gym"""
 83 | 
 84 |     def __init__(self, filename):
 85 |         _data = np.load(filename)
 86 |         self.X0 = np.copy(_data['X0'])  # Copy to memory, otherwise it's slow.
 87 |         self.X1 = np.copy(_data['X1'])
 88 |         self.U = np.copy(_data['U'])
 89 |         _data.close()
 90 | 
 91 |     def __len__(self):
 92 |         return len(self.X0)
 93 | 
 94 |     def __getitem__(self, index):
 95 |         return self.X0[index], self.U[index], self.X1[index]
 96 | 
 97 |     @classmethod
 98 |     def all_states(cls):
 99 |         _env = gym.make('Pendulum-v0').env
100 |         width = GymPendulumDataset.width
101 |         height = GymPendulumDataset.height
102 |         X = np.zeros((360, width, height))
103 | 
104 |         for i in range(360):
105 |             th = i / 360. * 2 * np.pi
106 |             state = _env.render_state(th)
107 |             X[i, :, :] = resize(rgb2gray(state), (width, height), mode='reflect')
108 |         _env.close()
109 |         _env.viewer.close()
110 |         return X
111 | 
112 |     @classmethod
113 |     def sample_trajectories(self, sample_size, step_size=1, apply_control=True):
114 |         _env = gym.make('Pendulum-v0').env
115 |         X0 = np.zeros((sample_size, 500, 500, 3), dtype=np.uint8)
116 |         U = np.zeros((sample_size, 1), dtype=np.float32)
117 |         X1 = np.zeros((sample_size, 500, 500, 3), dtype=np.uint8)
118 |         for i in range(sample_size):
119 |             th = np.random.uniform(0, np.pi * 2)
120 |             # thdot = np.random.uniform(-8, 8)
121 |             thdot = 0
122 |             state = np.array([th, thdot])
123 |             initial = state
124 |             # apply the same control over a few timesteps
125 |             if apply_control:
126 |                 u = np.random.uniform(-2, 2, size=(1,))
127 |             else:
128 |                 u = np.zeros((1,))
129 |             for _ in range(step_size):
130 |                 state = _env.step_from_state(state, u)
131 | 
132 |             X0[i, :, :, :] = _env.render_state(initial[0])
133 |             U[i, :] = u
134 |             X1[i, :, :, :] = _env.render_state(state[0])
135 |         _env.viewer.close()
136 |         return X0, U, X1
137 | 
138 | 
139 | class GymPendulumDatasetV2(Dataset):
140 |     width = 40 * 2
141 |     height = 40
142 |     action_dim = 1
143 | 
144 |     def __init__(self, dir):
145 |         self.dir = dir
146 |         with open(path.join(dir, 'data.json')) as f:
147 |             self._data = json.load(f)
148 |         self._process()
149 | 
150 |     def __len__(self):
151 |         return len(self._data['samples'])
152 | 
153 |     def __getitem__(self, index):
154 |         return self._processed[index]
155 | 
156 |     @staticmethod
157 |     def _process_image(img):
158 |         return ToTensor()((img.convert('L').
159 |                            resize((GymPendulumDatasetV2.width,
160 |                                    GymPendulumDatasetV2.height))))
161 | 
162 |     def _process(self):
163 |         preprocessed_file = os.path.join(self.dir, 'processed.pkl')
164 |         if not os.path.exists(preprocessed_file):
165 |             processed = []
166 |             for sample in tqdm(self._data['samples'], desc='processing data'):
167 |                 before = Image.open(os.path.join(self.dir, sample['before']))
168 |                 after = Image.open(os.path.join(self.dir, sample['after']))
169 | 
170 |                 processed.append((self._process_image(before),
171 |                                   np.array(sample['control']),
172 |                                   self._process_image(after)))
173 | 
174 |             with open(preprocessed_file, 'wb') as f:
175 |                 pickle.dump(processed, f)
176 |             self._processed = processed
177 |         else:
178 |             with open(preprocessed_file, 'rb') as f:
179 |                 self._processed = pickle.load(f)
180 | 
181 |     @staticmethod
182 |     def _render_state_fully_observed(env, state):
183 |         before1 = state
184 |         before2 = env.step_from_state(state, np.array([0]))
185 |         return map(env.render_state, [before1[0], before2[0]])
186 | 
187 |     @classmethod
188 |     def sample(cls, sample_size, output_dir, step_size=1,
189 |                apply_control=True, num_shards=10):
190 |         env = gym.make('Pendulum-v0').env
191 |         assert sample_size % num_shards == 0
192 | 
193 |         samples = []
194 | 
195 |         if not path.exists(output_dir):
196 |             os.makedirs(output_dir)
197 | 
198 |         for i in trange(sample_size):
199 |             th = np.random.uniform(0, np.pi * 2)
200 |             thdot = np.random.uniform(-8, 8)
201 | 
202 |             state = np.array([th, thdot])
203 |             u0 = np.array([0])
204 | 
205 |             initial_state = state
206 |             before1, before2 = GymPendulumDatasetV2._render_state_fully_observed(env, state)
207 | 
208 |             # apply the same control over a few timesteps
209 |             if apply_control:
210 |                 u = np.random.uniform(-2, 2, size=(1,))
211 |             else:
212 |                 u = np.zeros((1,))
213 | 
214 |             # state = env.step_from_state(state, u0)
215 |             for _ in range(step_size):
216 |                 state = env.step_from_state(state, u)
217 | 
218 |             after_state = state
219 |             after1, after2 = GymPendulumDatasetV2._render_state_fully_observed(env, state)
220 | 
221 |             before = np.hstack((before1, before2))
222 |             after = np.hstack((after1, after2))
223 | 
224 |             shard_no = i // (sample_size // num_shards)
225 | 
226 |             shard_path = path.join('{:03d}-of-{:03d}'.format(shard_no, num_shards))
227 | 
228 |             if not path.exists(path.join(output_dir, shard_path)):
229 |                 os.makedirs(path.join(output_dir, shard_path))
230 | 
231 |             before_file = path.join(shard_path, 'before-{:05d}.jpg'.format(i))
232 |             plt.imsave(path.join(output_dir, before_file), before)
233 | 
234 |             after_file = path.join(shard_path, 'after-{:05d}.jpg'.format(i))
235 |             plt.imsave(path.join(output_dir, after_file), after)
236 | 
237 |             samples.append({
238 |                 'before_state': initial_state.tolist(),
239 |                 'after_state': after_state.tolist(),
240 |                 'before': before_file,
241 |                 'after': after_file,
242 |                 'control': u.tolist(),
243 |             })
244 | 
245 |         with open(path.join(output_dir, 'data.json'), 'wt') as outfile:
246 |             json.dump(
247 |                 {
248 |                     'metadata': {
249 |                         'num_samples': sample_size,
250 |                         'step_size': step_size,
251 |                         'apply_control': apply_control,
252 |                         'time_created': str(datetime.now()),
253 |                         'version': 1
254 |                     },
255 |                     'samples': samples
256 |                 }, outfile, indent=2)
257 | 
258 |         env.viewer.close()
259 | 


--------------------------------------------------------------------------------
/e2c/e2c.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.autograd import Variable
  4 | 
  5 | from .losses import binary_crossentropy
  6 | 
  7 | 
  8 | class NormalDistribution(object):
  9 |     """
 10 |     Wrapper class representing a multivariate normal distribution parameterized by
 11 |     N(mu,Cov). If cov. matrix is diagonal, Cov=(sigma).^2. Otherwise,
 12 |     Cov=A*(sigma).^2*A', where A = (I+v*r^T).
 13 |     """
 14 | 
 15 |     def __init__(self, mu, sigma, logsigma, *, v=None, r=None):
 16 |         self.mu = mu
 17 |         self.sigma = sigma
 18 |         self.logsigma = logsigma
 19 |         self.v = v
 20 |         self.r = r
 21 | 
 22 |     @property
 23 |     def cov(self):
 24 |         """This should only be called when NormalDistribution represents one sample"""
 25 |         if self.v is not None and self.r is not None:
 26 |             assert self.v.dim() == 1
 27 |             dim = self.v.dim()
 28 |             v = self.v.unsqueeze(1)  # D * 1 vector
 29 |             rt = self.r.unsqueeze(0)  # 1 * D vector
 30 |             A = torch.eye(dim) + v.mm(rt)
 31 |             return A.mm(torch.diag(self.sigma.pow(2)).mm(A.t()))
 32 |         else:
 33 |             return torch.diag(self.sigma.pow(2))
 34 | 
 35 | 
 36 | def KLDGaussian(Q, N, eps=1e-8):
 37 |     """KL Divergence between two Gaussians
 38 |         Assuming Q ~ N(mu0, A\sigma_0A') where A = I + vr^{T}
 39 |         and      N ~ N(mu1, \sigma_1)
 40 |     """
 41 |     sum = lambda x: torch.sum(x, dim=1)
 42 |     k = float(Q.mu.size()[1])  # dimension of distribution
 43 |     mu0, v, r, mu1 = Q.mu, Q.v, Q.r, N.mu
 44 |     s02, s12 = (Q.sigma).pow(2) + eps, (N.sigma).pow(2) + eps
 45 |     a = sum(s02 * (1. + 2. * v * r) / s12) + sum(v.pow(2) / s12) * sum(r.pow(2) * s02)  # trace term
 46 |     b = sum((mu1 - mu0).pow(2) / s12)  # difference-of-means term
 47 |     c = 2. * (sum(N.logsigma - Q.logsigma) - torch.log(1. + sum(v * r) + eps))  # ratio-of-determinants term.
 48 | 
 49 |     #
 50 |     # print('trace: %s' % a)
 51 |     # print('mu_diff: %s' % b)
 52 |     # print('k: %s' % k)
 53 |     # print('det: %s' % c)
 54 | 
 55 |     return 0.5 * (a + b - k + c)
 56 | 
 57 | 
 58 | class E2C(nn.Module):
 59 |     def __init__(self, dim_in, dim_z, dim_u, config='pendulum'):
 60 |         super(E2C, self).__init__()
 61 |         enc, trans, dec = load_config(config)
 62 |         self.encoder = enc(dim_in, dim_z)
 63 | 
 64 |         self.decoder = dec(dim_z, dim_in)
 65 |         self.trans = trans(dim_z, dim_u)
 66 | 
 67 |     def encode(self, x):
 68 |         return self.encoder(x)
 69 | 
 70 |     def decode(self, z):
 71 |         return self.decoder(z)
 72 | 
 73 |     def transition(self, z, Qz, u):
 74 |         return self.trans(z, Qz, u)
 75 | 
 76 |     def reparam(self, mean, logvar):
 77 |         std = logvar.mul(0.5).exp_()
 78 |         self.z_mean = mean
 79 |         self.z_sigma = std
 80 |         eps = torch.FloatTensor(std.size()).normal_()
 81 |         if std.data.is_cuda:
 82 |             eps.cuda()
 83 |         eps = Variable(eps)
 84 |         return eps.mul(std).add_(mean), NormalDistribution(mean, std, torch.log(std))
 85 | 
 86 |     def forward(self, x, action, x_next):
 87 |         mean, logvar = self.encode(x)
 88 |         mean_next, logvar_next = self.encode(x_next)
 89 | 
 90 |         z, self.Qz = self.reparam(mean, logvar)
 91 |         z_next, self.Qz_next = self.reparam(mean_next, logvar_next)
 92 | 
 93 |         self.x_dec = self.decode(z)
 94 |         self.x_next_dec = self.decode(z_next)
 95 | 
 96 |         self.z_next_pred, self.Qz_next_pred = self.transition(z, self.Qz, action)
 97 |         self.x_next_pred_dec = self.decode(self.z_next_pred)
 98 | 
 99 |         return self.x_next_pred_dec
100 | 
101 |     def latent_embeddings(self, x):
102 |         return self.encode(x)[0]
103 | 
104 |     def predict(self, X, U):
105 |         mean, logvar = self.encode(X)
106 |         z, Qz = self.reparam(mean, logvar)
107 |         z_next_pred, Qz_next_pred = self.transition(z, Qz, U)
108 |         return self.decode(z_next_pred)
109 | 
110 | 
111 | def compute_loss(x_dec, x_next_pred_dec, x, x_next,
112 |                  Qz, Qz_next_pred,
113 |                  Qz_next):
114 |     # Reconstruction losses
115 |     if False:
116 |         x_reconst_loss = (x_dec - x_next).pow(2).sum(dim=1)
117 |         x_next_reconst_loss = (x_next_pred_dec - x_next).pow(2).sum(dim=1)
118 |     else:
119 |         x_reconst_loss = -binary_crossentropy(x, x_dec).sum(dim=1)
120 |         x_next_reconst_loss = -binary_crossentropy(x_next, x_next_pred_dec).sum(dim=1)
121 | 
122 |     logvar = Qz.logsigma.mul(2)
123 |     KLD_element = Qz.mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
124 |     KLD = torch.sum(KLD_element, dim=1).mul(-0.5)
125 | 
126 |     # ELBO
127 |     bound_loss = x_reconst_loss.add(x_next_reconst_loss).add(KLD)
128 |     kl = KLDGaussian(Qz_next_pred, Qz_next)
129 |     return bound_loss.mean(), kl.mean()
130 | 
131 | from .configs import load_config
132 | 


--------------------------------------------------------------------------------
/e2c/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def binary_crossentropy(t, o, eps=1e-8):
 5 |     return t * torch.log(o + eps) + (1.0 - t) * torch.log(1.0 - o + eps)
 6 | 
 7 | 
 8 | def kl_bernoulli(p, q, eps=1e-8):
 9 |     # http://ufldl.stanford.edu/tutorial/unsupervised/Autoencoders/
10 |     kl = p * torch.log((p + eps) / (q + eps)) + \
11 |          (1 - p) * torch.log((1 - p + eps) / (1 - q + eps))
12 |     return kl.mean()
13 | 


--------------------------------------------------------------------------------
/e2c/tf_e2c/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                  Version 2.0, January 2004
  3 |               http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 | "License" shall mean the terms and conditions for use, reproduction,
 10 | and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 | "Licensor" shall mean the copyright owner or entity authorized by
 13 | the copyright owner that is granting the License.
 14 | 
 15 | "Legal Entity" shall mean the union of the acting entity and all
 16 | other entities that control, are controlled by, or are under common
 17 | control with that entity. For the purposes of this definition,
 18 | "control" means (i) the power, direct or indirect, to cause the
 19 | direction or management of such entity, whether by contract or
 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 | outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 | "You" (or "Your") shall mean an individual or Legal Entity
 24 | exercising permissions granted by this License.
 25 | 
 26 | "Source" form shall mean the preferred form for making modifications,
 27 | including but not limited to software source code, documentation
 28 | source, and configuration files.
 29 | 
 30 | "Object" form shall mean any form resulting from mechanical
 31 | transformation or translation of a Source form, including but
 32 | not limited to compiled object code, generated documentation,
 33 | and conversions to other media types.
 34 | 
 35 | "Work" shall mean the work of authorship, whether in Source or
 36 | Object form, made available under the License, as indicated by a
 37 | copyright notice that is included in or attached to the work
 38 | (an example is provided in the Appendix below).
 39 | 
 40 | "Derivative Works" shall mean any work, whether in Source or Object
 41 | form, that is based on (or derived from) the Work and for which the
 42 | editorial revisions, annotations, elaborations, or other modifications
 43 | represent, as a whole, an original work of authorship. For the purposes
 44 | of this License, Derivative Works shall not include works that remain
 45 | separable from, or merely link (or bind by name) to the interfaces of,
 46 | the Work and Derivative Works thereof.
 47 | 
 48 | "Contribution" shall mean any work of authorship, including
 49 | the original version of the Work and any modifications or additions
 50 | to that Work or Derivative Works thereof, that is intentionally
 51 | submitted to Licensor for inclusion in the Work by the copyright owner
 52 | or by an individual or Legal Entity authorized to submit on behalf of
 53 | the copyright owner. For the purposes of this definition, "submitted"
 54 | means any form of electronic, verbal, or written communication sent
 55 | to the Licensor or its representatives, including but not limited to
 56 | communication on electronic mailing lists, source code control systems,
 57 | and issue tracking systems that are managed by, or on behalf of, the
 58 | Licensor for the purpose of discussing and improving the Work, but
 59 | excluding communication that is conspicuously marked or otherwise
 60 | designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 | "Contributor" shall mean Licensor and any individual or Legal Entity
 63 | on behalf of whom a Contribution has been received by Licensor and
 64 | subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 | this License, each Contributor hereby grants to You a perpetual,
 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 | copyright license to reproduce, prepare Derivative Works of,
 70 | publicly display, publicly perform, sublicense, and distribute the
 71 | Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 | this License, each Contributor hereby grants to You a perpetual,
 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 | (except as stated in this section) patent license to make, have made,
 77 | use, offer to sell, sell, import, and otherwise transfer the Work,
 78 | where such license applies only to those patent claims licensable
 79 | by such Contributor that are necessarily infringed by their
 80 | Contribution(s) alone or by combination of their Contribution(s)
 81 | with the Work to which such Contribution(s) was submitted. If You
 82 | institute patent litigation against any entity (including a
 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 | or a Contribution incorporated within the Work constitutes direct
 85 | or contributory patent infringement, then any patent licenses
 86 | granted to You under this License for that Work shall terminate
 87 | as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 | Work or Derivative Works thereof in any medium, with or without
 91 | modifications, and in Source or Object form, provided that You
 92 | meet the following conditions:
 93 | 
 94 | (a) You must give any other recipients of the Work or
 95 | Derivative Works a copy of this License; and
 96 | 
 97 | (b) You must cause any modified files to carry prominent notices
 98 | stating that You changed the files; and
 99 | 
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 | 
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 | 
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!)  The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 | 
189 | Copyright {2017} {Eric Jang}
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/e2c/tf_e2c/README.md:
--------------------------------------------------------------------------------
 1 | # e2c
 2 | 
 3 | TensorFlow impementation of: [Embed to Control: A Locally Linear Latent Dynamics Model for Control from Raw Images](http://arxiv.org/abs/1506.07365), with code optimized for clarity and simplicity.
 4 | 
 5 | ![latent](http://i.imgur.com/zO5G3K0.png)
 6 | 
 7 | Only 160 lines of code, and only uses Python modules that come installed with TensorFlow. Proper writeup explaining the paper plus improved model code to soon follow.
 8 | 
 9 | ## Results
10 | 
11 | Left column are x_t, x_{t+1}, and right column are the E2C reconstructions.
12 | ![reconstruction](https://1.bp.blogspot.com/-L2qTQr8XZMY/Vv3cgLAklqI/AAAAAAAAE8g/rjMk2Z98XxEalKyXvtZUGeHtArdsD2vBg/s640/figure_1.png)
13 | 
14 | Larger step sizes (magnitude of u) yield better latent space reconstruction...
15 | 
16 | ![unfolding latent space](http://i.imgur.com/DF6Gd96.gif)
17 | 
18 | but degrade image reconstruction fidelity (more on this later...). Here's a different set of obstacles:
19 | 
20 | ![poor reconstruction](http://i.imgur.com/cl9RjlR.png)
21 | 
22 | ## Features:
23 | - Implements the standard E2C model with the factorized Gaussian KL divergence term (Eq. 14)
24 | - Adam Optimizer + Orthogonal weight initialization scheme by [Saxe et al.](http://arxiv.org/abs/1312.6120).
25 | - Learns the latent space of the planar control task (uses the same parameters described in the paper, Appendix B.6.2)
26 | 
27 | ## Training the Model
28 | 
29 | First, generate the synthetic training data `plane2.npz` by running the following script.
30 | 
31 | ```bash
32 | $ python plane_data2.py
33 | ```
34 | 
35 | Then, train the model
36 | ```bash
37 | $ python e2c.py
38 | ```
39 | 
40 | You can then generate visualizations by executing:
41 | 
42 | ```bash
43 | $ python viz_results.py
44 | ```
45 | 
46 | ## Acknowledgements
47 | 
48 | Thanks to Manuel Watter for answering my questions about the paper.
49 | 
50 | ## License
51 | 
52 | Apache 2.0
53 | 


--------------------------------------------------------------------------------
/e2c/tf_e2c/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__init__.py


--------------------------------------------------------------------------------
/e2c/tf_e2c/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/e2c/tf_e2c/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/e2c/tf_e2c/__pycache__/dataset.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/dataset.cpython-35.pyc


--------------------------------------------------------------------------------
/e2c/tf_e2c/__pycache__/dataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/dataset.cpython-36.pyc


--------------------------------------------------------------------------------
/e2c/tf_e2c/__pycache__/plane_data2.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/plane_data2.cpython-35.pyc


--------------------------------------------------------------------------------
/e2c/tf_e2c/__pycache__/plane_data2.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/plane_data2.cpython-36.pyc


--------------------------------------------------------------------------------
/e2c/tf_e2c/dataset.py:
--------------------------------------------------------------------------------
1 | # abstract helper class customized for training E2C tasks
2 | 
3 | 
4 | class DataSet(object):
5 |     """docstring for DataSet"""
6 | 
7 |     def __init__(self):
8 |         super(DataSet, self).__init__()
9 | 


--------------------------------------------------------------------------------
/e2c/tf_e2c/e2c_plane.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Implementation of Embed-to-Control model: http://arxiv.org/abs/1506.07365
  4 | Code is organized for simplicity and readability w.r.t paper.
  5 | 
  6 | Author: Eric Jang
  7 | """
  8 | 
  9 | import numpy as np
 10 | import tensorflow as tf
 11 | import matplotlib.pyplot as plt
 12 | import os
 13 | from data.plane_data2 import PlaneData, get_params
 14 | 
 15 | import ipdb as pdb
 16 | # np.random.seed(0)
 17 | tf.set_random_seed(0)
 18 | 
 19 | A = B = 40
 20 | 
 21 | x_dim, u_dim, T = get_params()
 22 | z_dim = 2  # latent space dimensionality
 23 | eps = 1e-9  # numerical stability
 24 | 
 25 | 
 26 | def orthogonal_initializer(scale=1.1):
 27 |     """
 28 |   From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
 29 |   """
 30 | 
 31 |     def _initializer(shape, dtype=tf.float32):
 32 |         flat_shape = (shape[0], np.prod(shape[1:]))
 33 |         a = np.random.normal(0.0, 1.0, flat_shape)
 34 |         u, _, v = np.linalg.svd(a, full_matrices=False)
 35 |         # pick the one with the correct shape
 36 |         q = u if u.shape == flat_shape else v
 37 |         q = q.reshape(shape)
 38 |         print(
 39 |             'Warning -- You have opted to use the orthogonal_initializer function'
 40 |         )
 41 |         return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
 42 | 
 43 |     return _initializer
 44 | 
 45 | 
 46 | class NormalDistribution(object):
 47 |     """
 48 |   Represents a multivariate normal distribution parameterized by
 49 |   N(mu,Cov). If cov. matrix is diagonal, Cov=(sigma).^2. Otherwise,
 50 |   Cov=A*(sigma).^2*A', where A = (I+v*r^T).
 51 |   """
 52 | 
 53 |     def __init__(self, mu, sigma, logsigma, v=None, r=None):
 54 |         self.mu = mu
 55 |         self.sigma = sigma  # either stdev diagonal itself, or stdev diagonal from decomposition
 56 |         self.logsigma = logsigma
 57 |         dim = mu.get_shape()
 58 |         if v is None:
 59 |             v = tf.constant(0., shape=dim)
 60 |         if r is None:
 61 |             r = tf.constant(0., shape=dim)
 62 |         self.v = v
 63 |         self.r = r
 64 | 
 65 | 
 66 | def linear(x, output_dim):
 67 |     w = tf.get_variable(
 68 |         "w", [x.get_shape()[1], output_dim],
 69 |         initializer=orthogonal_initializer(1.1))
 70 |     b = tf.get_variable(
 71 |         "b", [output_dim], initializer=tf.constant_initializer(0.0))
 72 |     return tf.matmul(x, w) + b
 73 | 
 74 | 
 75 | def ReLU(x, output_dim, scope):
 76 |     # helper function for implementing stacked ReLU layers
 77 |     with tf.variable_scope(scope):
 78 |         return tf.nn.relu(linear(x, output_dim))
 79 | 
 80 | 
 81 | def encode(x, share=None):
 82 |     with tf.variable_scope("encoder", reuse=share):
 83 |         for l in range(3):
 84 |             x = ReLU(x, 150, "aggregate_loss" + str(l))
 85 |         return linear(x, 2 * z_dim)
 86 | 
 87 | 
 88 | def KLGaussian(Q, N):
 89 |     # Q, N are instances of NormalDistribution
 90 |     # implements KL Divergence term KL(N0,N1) derived in Appendix A.1
 91 |     # Q ~ Normal(mu,A*sigma*A^T), N ~ Normal(mu,sigma_1)
 92 |     # returns scalar divergence, measured in nats (information units under log rather than log2), shape= batch x 1
 93 |     sum = lambda x: tf.reduce_sum(x, 1)  # convenience fn for summing over features (columns)
 94 |     k = float(Q.mu.get_shape()[1].value)  # dimension of distribution
 95 |     mu0, v, r, mu1 = Q.mu, Q.v, Q.r, N.mu
 96 |     s02, s12 = tf.square(Q.sigma), tf.square(N.sigma) + eps
 97 |     #vr=sum(v*r)
 98 |     a = sum(s02 * (1. + 2. * v * r) / s12) + sum(tf.square(v) / s12) * sum(
 99 |         tf.square(r) * s02)  # trace term
100 |     b = sum(tf.square(mu1 - mu0) / s12)  # difference-of-means term
101 |     c = 2. * (sum(N.logsigma - Q.logsigma) - tf.log(1. + sum(v * r))
102 |               )  # ratio-of-determinants term. 
103 |     return 0.5 * (a + b - k + c)  #, a, b, c
104 | 
105 | 
106 | def sampleNormal(mu, sigma):
107 |     # diagonal stdev
108 |     n01 = tf.random_normal(sigma.get_shape(), mean=0, stddev=1)
109 |     return mu + sigma * n01
110 | 
111 | 
112 | def sampleQ_phi(h_enc, share=None):
113 |     with tf.variable_scope("sampleQ_phi", reuse=share):
114 |         mu, log_sigma = tf.split(1, 2, linear(
115 |             h_enc, z_dim * 2))  # diagonal stdev values
116 |         sigma = tf.exp(log_sigma)
117 |         return sampleNormal(mu, sigma), NormalDistribution(
118 |             mu, sigma, log_sigma)
119 | 
120 | 
121 | def transition(h):
122 |     # compute A,B,o linearization matrices
123 |     with tf.variable_scope("trans"):
124 |         for l in range(2):
125 |             h = ReLU(h, 100, "aggregate_loss" + str(l))
126 |         with tf.variable_scope("A"):
127 |             v, r = tf.split(1, 2, linear(h, z_dim * 2))
128 |             v1 = tf.expand_dims(v, -1)  # (batch, z_dim, 1)
129 |             rT = tf.expand_dims(r, 1)  # batch, 1, z_dim
130 |             I = tf.diag([1.] * z_dim)
131 |             A = (
132 |                 I + tf.batch_matmul(v1, rT)
133 |             )  # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) 
134 |         with tf.variable_scope("B"):
135 |             B = linear(h, z_dim * u_dim)
136 |             B = tf.reshape(B, [-1, z_dim, u_dim])
137 |         with tf.variable_scope("o"):
138 |             o = linear(h, z_dim)
139 |         return A, B, o, v, r
140 | 
141 | 
142 | def sampleQ_psi(z, u, Q_phi):
143 |     A, B, o, v, r = transition(z)
144 |     with tf.variable_scope("sampleQ_psi"):
145 |         mu_t = tf.expand_dims(Q_phi.mu, -1)  # batch,z_dim,1
146 |         Amu = tf.squeeze(tf.batch_matmul(A, mu_t), [-1])
147 |         u = tf.expand_dims(u, -1)  # batch,u_dim,1
148 |         Bu = tf.squeeze(tf.batch_matmul(B, u), [-1])
149 |         Q_psi = NormalDistribution(Amu + Bu + o, Q_phi.sigma, Q_phi.logsigma,
150 |                                    v, r)
151 |         # the actual z_next sample is generated by deterministically transforming z_t
152 |         z = tf.expand_dims(z, -1)
153 |         Az = tf.squeeze(tf.batch_matmul(A, z), [-1])
154 |         z_next = Az + Bu + o
155 |         return z_next, Q_psi  #,(A,B,o,v,r) # debugging
156 | 
157 | 
158 | def decode(z, share=None):
159 |     with tf.variable_scope("decoder", reuse=share):
160 |         for l in range(2):
161 |             z = ReLU(z, 200, "aggregate_loss" + str(l))
162 |         return linear(z, x_dim)
163 | 
164 | 
165 | def binary_crossentropy(t, o):
166 |     return t * tf.log(o + eps) + (1.0 - t) * tf.log(1.0 - o + eps)
167 | 
168 | 
169 | def recons_loss(x, x_recons):
170 |     with tf.variable_scope("Lx"):
171 |         ll = tf.reduce_sum(binary_crossentropy(x, x_recons),
172 |                            1)  # sum across features
173 |         return -ll  # negative log-likelihood
174 | 
175 | 
176 | def latent_loss(Q):
177 |     with tf.variable_scope("Lz"):
178 |         mu2 = tf.square(Q.mu)
179 |         sigma2 = tf.square(Q.sigma)
180 |         # negative of the upper bound of posterior
181 |         return -0.5 * tf.reduce_sum(1 + 2 * Q.logsigma - mu2 - sigma2, 1)
182 | 
183 | 
184 | def sampleP_theta(h_dec, share=None):
185 |     # sample x from bernoulli distribution with means p=W(h_dec)
186 |     with tf.variable_scope("P_theta", reuse=share):
187 |         p = linear(h_dec, x_dim)
188 |         return tf.sigmoid(p)  # mean of bernoulli distribution
189 | 
190 | 
191 | # BUILD NETWORK
192 | batch_size = 128
193 | 
194 | x = tf.placeholder(tf.float32, [batch_size, x_dim])
195 | u = tf.placeholder(tf.float32, [batch_size, u_dim])  # control at time t
196 | x_next = tf.placeholder(tf.float32, [batch_size,
197 |                                      x_dim])  # observation at time t+1
198 | 
199 | # encode x_t
200 | h_enc = encode(x)
201 | z, Q_phi = sampleQ_phi(h_enc)
202 | # reconstitute x_t
203 | h_dec = decode(z)
204 | x_recons = sampleP_theta(h_dec)
205 | # compute linearized dynamics, predict new latent state
206 | z_predict, Q_psi = sampleQ_psi(z, u, Q_phi)
207 | # decode prediction
208 | h_dec_predict = decode(z_predict, share=True)
209 | x_predict = sampleP_theta(h_dec_predict, share=True)
210 | # encode next 
211 | h_enc_next = encode(x_next, share=True)
212 | z_next, Q_phi_next = sampleQ_phi(h_enc_next, share=True)
213 | 
214 | with tf.variable_scope("Loss"):
215 |     L_x = recons_loss(x, x_recons)
216 |     L_x_next = recons_loss(x_next, x_predict)
217 |     L_z = latent_loss(Q_phi)
218 |     L_bound = L_x + L_x_next + L_z
219 |     KL = KLGaussian(Q_psi, Q_phi_next)
220 |     lambd = 0.25
221 |     loss = tf.reduce_mean(
222 |         L_bound + lambd * KL)  # average loss over minibatch to single scalar
223 | 
224 | for v in tf.all_variables():
225 |     print("%s : %s" % (v.name, v.get_shape()))
226 | 
227 | pdb.set_trace()
228 | 
229 | with tf.variable_scope("Optimizer"):
230 |     learning_rate = 1e-4
231 |     optimizer = tf.train.AdamOptimizer(
232 |         learning_rate, beta1=0.1, beta2=0.1)  # beta2=0.1
233 |     train_op = optimizer.minimize(loss)
234 | 
235 | saver = tf.train.Saver(max_to_keep=200)  # keep all checkpoint files
236 | 
237 | ckpt_file = "/ltmp/e2c-plane"
238 | 
239 | # summaries
240 | tf.scalar_summary("loss", loss)
241 | tf.scalar_summary("L_x", tf.reduce_mean(L_x))
242 | tf.scalar_summary("L_x_next", tf.reduce_mean(L_x_next))
243 | tf.scalar_summary("L_z", tf.reduce_mean(L_z))
244 | tf.scalar_summary("KL", tf.reduce_mean(KL))
245 | all_summaries = tf.merge_all_summaries()
246 | 
247 | # TRAIN
248 | if __name__ == "__main__":
249 |     init = tf.initialize_all_variables()
250 |     sess = tf.InteractiveSession()
251 |     sess.run(init)
252 |     # WRITER
253 |     writer = tf.train.SummaryWriter("/ltmp/e2c", sess.graph_def)
254 | 
255 |     dataset = PlaneData("data/plane1.npz", "data/env1.png")
256 |     dataset.initialize()
257 | 
258 |     # tmp
259 |     # (x_val,u_val,x_next_val)=dataset.sample(batch_size, replace=False)
260 |     # feed_dict={
261 |     #   x:x_val,
262 |     #   u:u_val,
263 |     #   x_next:x_next_val
264 |     # }
265 |     # results=sess.run([L_x,L_x_next,L_z,L_bound,KL],feed_dict)
266 |     # pdb.set_trace()
267 |     # resume training
268 |     #saver.restore(sess, "/ltmp/e2c-plane-83000.ckpt")
269 |     train_iters = 2e5  # 5K iters
270 |     for i in range(int(train_iters)):
271 |         (x_val, u_val, x_next_val) = dataset.sample(batch_size, replace=False)
272 |         feed_dict = {x: x_val, u: u_val, x_next: x_next_val}
273 |         plt.hist(x_val[0, :])
274 |         plt.show()
275 |         results = sess.run([loss, all_summaries, train_op], feed_dict)
276 |         if i % 1000 == 0:
277 |             print("iter=%d : Loss: %f" % (i, results[0]))
278 |             if i > 2000:
279 |                 writer.add_summary(results[1], i)
280 |         if (i % 100 == 0 and i < 1000) or (i % 1000 == 0):
281 |             saver.save(sess, ckpt_file + "-%05d" % (i) + ".ckpt")
282 | 
283 |     sess.close()
284 | 


--------------------------------------------------------------------------------
/e2c/tf_e2c/e2c_seq.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | multi-step prediction using E2C
  5 | results are kind of poor...
  6 | """
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | import matplotlib.pyplot as plt
 11 | import os
 12 | from data.plane_data2 import PlaneData, get_params
 13 | 
 14 | A=B=40
 15 | 
 16 | x_dim,u_dim,T=get_params()
 17 | z_dim=2 # latent space dimensionality
 18 | eps=1e-9 # numerical stability
 19 | 
 20 | def orthogonal_initializer(scale = 1.1):
 21 |   """
 22 |   From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
 23 |   """
 24 |   def _initializer(shape, dtype=tf.float32):
 25 |     flat_shape = (shape[0], np.prod(shape[1:]))
 26 |     a = np.random.normal(0.0, 1.0, flat_shape)
 27 |     u, _, v = np.linalg.svd(a, full_matrices=False)
 28 |     # pick the one with the correct shape
 29 |     q = u if u.shape == flat_shape else v
 30 |     q = q.reshape(shape)
 31 |     print('Warning -- You have opted to use the orthogonal_initializer function')
 32 |     return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
 33 |   return _initializer
 34 | 
 35 | class NormalDistribution(object):
 36 |   """
 37 |   Represents a multivariate normal distribution parameterized by
 38 |   N(mu,Cov). If cov. matrix is diagonal, Cov=(sigma).^2. Otherwise,
 39 |   Cov=A*(sigma).^2*A', where A = (I+v*r^T).
 40 |   """
 41 |   def __init__(self,mu,sigma,logsigma,v=None,r=None):
 42 |     self.mu=mu
 43 |     self.sigma=sigma # either stdev diagonal itself, or stdev diagonal from decomposition
 44 |     self.logsigma=logsigma
 45 |     dim=mu.get_shape()
 46 |     if v is None:
 47 |       v=tf.constant(0.,shape=dim)
 48 |     if r is None:
 49 |       r=tf.constant(0.,shape=dim)
 50 |     self.v=v
 51 |     self.r=r
 52 | 
 53 | def linear(x,output_dim):
 54 |   w=tf.get_variable("w", [x.get_shape()[1], output_dim], initializer=orthogonal_initializer(1.1))
 55 |   b=tf.get_variable("b", [output_dim], initializer=tf.constant_initializer(0.0))
 56 |   return tf.matmul(x,w)+b
 57 | 
 58 | def ReLU(x,output_dim, scope):
 59 |   # helper function for implementing stacked ReLU layers
 60 |   with tf.variable_scope(scope):
 61 |     return tf.nn.relu(linear(x,output_dim))
 62 | 
 63 | def encode(x,share=None):
 64 |   with tf.variable_scope("encoder",reuse=share):
 65 |     for l in range(3):
 66 |       x=ReLU(x,150,"aggregate_loss"+str(l))
 67 |     return linear(x,2*z_dim)
 68 | 
 69 | def KLGaussian(Q,N):
 70 |   # Q, N are instances of NormalDistribution
 71 |   # implements KL Divergence term KL(N0,N1) derived in Appendix A.1
 72 |   # Q ~ Normal(mu,A*sigma*A^T), N ~ Normal(mu,sigma_1)
 73 |   # returns scalar divergence, measured in nats (information units under log rather than log2), shape= batch x 1
 74 |   sum=lambda x: tf.reduce_sum(x,1) # convenience fn for summing over features (columns)
 75 |   k=float(Q.mu.get_shape()[1].value) # dimension of distribution
 76 |   mu0,v,r,mu1=Q.mu,Q.v,Q.r,N.mu
 77 |   s02,s12=tf.square(Q.sigma),tf.square(N.sigma)+eps
 78 |   #vr=sum(v*r)
 79 |   a=sum(s02*(1.+2.*v*r)/s12) + sum(tf.square(v)/s12)*sum(tf.square(r)*s02) # trace term
 80 |   b=sum(tf.square(mu1-mu0)/s12) # difference-of-means term
 81 |   c=2.*(sum(N.logsigma-Q.logsigma) - tf.log(1.+sum(v*r))) # ratio-of-determinants term. 
 82 |   return 0.5*(a+b-k+c)#, a, b, c
 83 | 
 84 | def sampleNormal(mu,sigma):
 85 |   # diagonal stdev
 86 |   n01=tf.random_normal(sigma.get_shape(), mean=0, stddev=1)
 87 |   return mu+sigma*n01
 88 | 
 89 | def sampleQ_phi(h_enc,share=None):
 90 |   with tf.variable_scope("sampleQ_phi",reuse=share):
 91 |     mu,log_sigma=tf.split(1,2,linear(h_enc,z_dim*2)) # diagonal stdev values
 92 |     sigma=tf.exp(log_sigma)
 93 |     return sampleNormal(mu,sigma), NormalDistribution(mu, sigma, log_sigma)
 94 | 
 95 | def transition(h,share=None):
 96 |   # compute A,B,o linearization matrices
 97 |   with tf.variable_scope("trans",reuse=share):
 98 |     for l in range(2):
 99 |       h=ReLU(h,100,"aggregate_loss"+str(l))
100 |     with tf.variable_scope("A"):
101 |       v,r=tf.split(1,2,linear(h,z_dim*2))
102 |       v1=tf.expand_dims(v,-1) # (batch, z_dim, 1)
103 |       rT=tf.expand_dims(r,1) # batch, 1, z_dim
104 |       I=tf.diag([1.]*z_dim)
105 |       A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) 
106 |     with tf.variable_scope("B"):
107 |       B=linear(h,z_dim*u_dim)
108 |       B=tf.reshape(B,[-1,z_dim,u_dim])
109 |     with tf.variable_scope("o"):
110 |       o=linear(h,z_dim)
111 |     return A,B,o,v,r
112 | 
113 | def sampleQ_psi(z,u,Q_phi,share=None):
114 |   A,B,o,v,r=transition(z,share)
115 |   with tf.variable_scope("sampleQ_psi"):
116 |     mu_t=tf.expand_dims(Q_phi.mu,-1) # batch,z_dim,1
117 |     Amu=tf.squeeze(tf.batch_matmul(A,mu_t), [-1])
118 |     u=tf.expand_dims(u,-1) # batch,u_dim,1
119 |     Bu=tf.squeeze(tf.batch_matmul(B,u),[-1])
120 |     Q_psi=NormalDistribution(Amu+Bu+o,Q_phi.sigma,Q_phi.logsigma, v, r)
121 |     # the actual z_next sample is generated by deterministically transforming z_t
122 |     z=tf.expand_dims(z,-1)
123 |     Az=tf.squeeze(tf.batch_matmul(A,z),[-1])
124 |     z_next=Az+Bu+o
125 |     return z_next,Q_psi#,(A,B,o,v,r) # debugging
126 | 
127 | def decode(z,share=None):
128 |   with tf.variable_scope("decoder",reuse=share):
129 |     for l in range(2):
130 |       z=ReLU(z,200,"aggregate_loss"+str(l))
131 |     return linear(z,x_dim)
132 | 
133 | def binary_crossentropy(t,o):
134 |     return t*tf.log(o+eps) + (1.0-t)*tf.log(1.0-o+eps)
135 | 
136 | def recons_loss(x,x_recons):
137 |   with tf.variable_scope("Lx"):
138 |     ll=tf.reduce_sum(binary_crossentropy(x,x_recons),1) # sum across features
139 |     return -ll # negative log-likelihood
140 | 
141 | def latent_loss(Q):
142 |   with tf.variable_scope("Lz"):
143 |     mu2=tf.square(Q.mu)
144 |     sigma2=tf.square(Q.sigma)
145 |     # negative of the upper bound of posterior
146 |     return -0.5*tf.reduce_sum(1+2*Q.logsigma-mu2-sigma2,1)
147 | 
148 | def sampleP_theta(h_dec,share=None):
149 |   # sample x from bernoulli distribution with means p=W(h_dec)
150 |   with tf.variable_scope("P_theta",reuse=share):
151 |     p=linear(h_dec,x_dim)
152 |     return tf.sigmoid(p) # mean of bernoulli distribution
153 | 
154 | # BUILD NETWORK
155 | batch_size=64
156 | 
157 | 
158 | # modified
159 | T=10 # sequence len
160 | 
161 | # inputs
162 | xs=[tf.placeholder(tf.float32,[batch_size, x_dim])]*T
163 | us=[tf.placeholder(tf.float32, [batch_size, u_dim])]*(T-1)
164 | 
165 | x_recons=[0]*(T-1) # we'latent_loss apply VAE recons to the last one in the sequence as well
166 | x_predicts=[0]*(T-1)
167 | 
168 | # T-1 recons and prediction losses
169 | losses=[0]*(T-1)
170 | kl_losses=[0]*(T-1)
171 | lbound_losses=[0]*(T-1)
172 | print(T)
173 | # arrays for storing losses
174 | DO_SHARE=None
175 | for t in range(T-1):
176 |   if t==0:
177 |     x=xs[0] # we are given the first one
178 |   else:
179 |     x=x_predicts[t-1] # re-use our hallucinations to predict
180 |   #x=xs[t]
181 |   h_enc=encode(x,share=DO_SHARE)
182 |   z,Q_phi=sampleQ_phi(h_enc,share=DO_SHARE)
183 |   # reconstitute x_t
184 |   h_dec=decode(z,share=DO_SHARE)
185 |   x_recons[t]=sampleP_theta(h_dec,share=DO_SHARE)
186 |   # linearized dynamics, predict
187 |   # compute linearized dynamics, predict new latent state
188 |   z_predict,Q_psi=sampleQ_psi(z,us[t],Q_phi,share=DO_SHARE)
189 |   DO_SHARE=True # share from now on
190 |   h_dec_predict=decode(z_predict,share=DO_SHARE)
191 |   x_predicts[t]=sampleP_theta(h_dec_predict,share=DO_SHARE)
192 |   # encode next true data
193 |   h_enc_next=encode(xs[t+1],share=DO_SHARE)
194 |   z_next,Q_phi_next=sampleQ_phi(h_enc_next,share=DO_SHARE)
195 | 
196 |   # add loss terms
197 |   L_x=recons_loss(xs[t],x_recons[t])
198 |   L_x_next=recons_loss(xs[t+1],x_predicts[t])
199 |   L_z=latent_loss(Q_phi)
200 |   L_bound=L_x+L_x_next+L_z
201 |   KL=KLGaussian(Q_psi,Q_phi_next)
202 |   lambd=1.0
203 |   
204 |   lbound_losses[t]=tf.reduce_mean(L_bound)
205 |   kl_losses[t]=tf.reduce_mean(lambd*KL)
206 |   losses[t]=tf.reduce_mean(L_bound+lambd*KL) # average loss over minibatch to single scalar
207 | 
208 | 
209 | loss=tf.add_n(losses) # sum all the losses to compute the sequence loss
210 | 
211 | for v in tf.all_variables():
212 |     print("%s : %s" % (v.name, v.get_shape()))
213 | 
214 | with tf.variable_scope("Optimizer"):
215 |   learning_rate=1e-4
216 |   optimizer=tf.train.AdamOptimizer(learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1
217 |   train_op=optimizer.minimize(loss)
218 | 
219 | saver = tf.train.Saver(max_to_keep=100) # keep all checkpoint files
220 | 
221 | ckpt_file="/ltmp/e2c-plane"
222 | 
223 | # summaries
224 | tf.scalar_summary("loss", loss)
225 | 
226 | # monitor losses in case behavior of Lz_8 differs drastically from Lz_0 prediction
227 | #for t in range(T-1):
228 | #  tf.scalar_summary("Lb_%d" % (t), lbound_losses[t])
229 | #  tf.scalar_summary("Lz_%d" % (t), kl_losses[t])
230 | #  tf.scalar_summary("loss_%d" % (t), losses[t])
231 | 
232 | # tf.scalar_summary("L_x", tf.reduce_mean(L_x))
233 | # tf.scalar_summary("L_x_next", tf.reduce_mean(L_x_next))
234 | # tf.scalar_summary("L_z", tf.reduce_mean(L_z))
235 | # tf.scalar_summary("KL",tf.reduce_mean(KL))
236 | all_summaries = tf.merge_all_summaries()
237 | 
238 | # TRAIN
239 | if __name__=="__main__":
240 |   init=tf.initialize_all_variables()
241 |   sess=tf.InteractiveSession()
242 |   sess.run(init)
243 |   # WRITER
244 |   writer = tf.train.SummaryWriter("/ltmp/e2c", sess.graph_def)
245 | 
246 |   dataset=PlaneData("data/plane1.npz","data/env1.png")
247 |   dataset.initialize()
248 | 
249 |   # resume training
250 |   saver.restore(sess, "/ltmp/e2c-plane-single1.ckpt")
251 | 
252 |   # test to make sure samples are actually trajectories
253 | 
254 |   def getimgs(x):
255 |     padsize=1
256 |     padval=.5
257 |     ph=B+2*padsize
258 |     pw=A+2*padsize
259 |     img=np.ones((ph,len(x)*pw))*padval
260 |     for t in range(len(x)):
261 |       startc=t*pw+padsize
262 |       img[padsize:padsize+B, startc:startc+A]=x[t][20,:].reshape((A,B))
263 |     return img
264 |   (x_vals,u_vals)=dataset.sample_seq(batch_size,T)
265 |   plt.matshow(getimgs(x_vals),cmap=plt.cm.gray,vmin=0,vmax=1)
266 |   plt.show()
267 | 
268 |   train_iters=2e5 # 5K iters
269 |   for i in range(int(train_iters)):
270 |     (x_vals,u_vals)=dataset.sample_seq(batch_size,T,replace=False)
271 |     feed_dict={}
272 |     for t in range(T):
273 |       feed_dict[xs[t]] = x_vals[t]
274 |     for t in range(T-1):
275 |       feed_dict[us[t]] = u_vals[t]
276 | 
277 |     results=sess.run([loss,all_summaries,train_op],feed_dict)
278 |     if i%1000==0:
279 |       print("iter=%d : Loss: %f" % (i,results[0]))
280 |       if i>2000:
281 |         writer.add_summary(results[1], i)
282 |     if (i%100==0 and i < 1000) or (i % 1000 == 0):
283 |       saver.save(sess,ckpt_file+"-%05d"%(i)+".ckpt")
284 | 
285 |   # save variables
286 |   #print("Model saved in file: %s" % saver.save(sess,ckpt_file+".ckpt"))
287 | 
288 |   sess.close()
289 | 


--------------------------------------------------------------------------------
/e2c/tf_e2c/env0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/env0.png


--------------------------------------------------------------------------------
/e2c/tf_e2c/env1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/env1.png


--------------------------------------------------------------------------------
/e2c/tf_e2c/env_blank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/env_blank.png


--------------------------------------------------------------------------------
/e2c/tf_e2c/plane_data2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | import numpy as np
  6 | from numpy.random import randint
  7 | import os
  8 | from .dataset import DataSet
  9 | 
 10 | num_t = 80  # number of trajectories (i.e. number of initial states)
 11 | T = 1000    # length of each trajectory sequence
 12 | u_dim = 2   # control (action) dimension
 13 | w, h = 40, 40
 14 | x_dim = w * h
 15 | rw = 1      # robot half-width
 16 | 
 17 | 
 18 | def get_params():
 19 |     return x_dim, u_dim, T
 20 | 
 21 | 
 22 | class PlaneData(DataSet):
 23 |     def __init__(self, fname, env_file):
 24 |         super(PlaneData, self).__init__()
 25 |         self.cache = fname
 26 |         self.initialized = False
 27 |         self.im = plt.imread(os.path.join(os.path.dirname(__file__), env_file))  # grayscale
 28 |         self.params = (x_dim, u_dim, T)
 29 | 
 30 |     def is_colliding(self, p):
 31 |         if np.any([p - rw < 0, p + rw >= w]):
 32 |             return True
 33 |         # check robot body overlap with obstacle field
 34 |         return np.mean(
 35 |             self.im[p[0] - rw:p[0] + rw + 1, p[1] - rw:p[1] + rw + 1]) > 0.05
 36 | 
 37 |     def compute_traj(self, max_dist=1):
 38 |         # computes P,U data for single trajectory
 39 |         # all P,U share the same environment obstacles.png
 40 |         P = np.zeros((T, 2), dtype=np.int)  # r,c position
 41 |         U = np.zeros((T, u_dim), dtype=np.int)
 42 |         P[0, :] = [rw, randint(rw, w - rw)]  # initial location
 43 |         for t in range(1, T):
 44 |             p = np.copy(P[t - 1, :])
 45 |             # dr direction
 46 |             d = randint(-1, 2)  # direction
 47 |             nsteps = randint(max_dist + 1)
 48 |             dr = d * nsteps  # applied control
 49 |             for i in range(nsteps):
 50 |                 p[0] += d
 51 |                 if self.is_colliding(p):
 52 |                     p[0] -= d
 53 |                     break
 54 |             # dc direction
 55 |             d = randint(-1, 2)  # direction
 56 |             nsteps = randint(max_dist + 1)
 57 |             dc = d * nsteps  # applied control
 58 |             for i in range(nsteps):
 59 |                 p[1] += d
 60 |                 if self.is_colliding(p):
 61 |                     p[1] -= d  # step back
 62 |                     break
 63 |             P[t, :] = p
 64 |             U[t, :] = [dr, dc]
 65 |         return P, U
 66 | 
 67 |     def initialize(self):
 68 |         if os.path.exists(self.cache):
 69 |             self.load()
 70 |         else:
 71 |             self.precompute()
 72 |         self.initialized = True
 73 | 
 74 |     def compute_data(self):
 75 |         # compute multiple trajectories
 76 |         P = np.zeros((num_t, T, 2), dtype=np.int)
 77 |         U = np.zeros((num_t, T, u_dim), dtype=np.int)
 78 |         for i in range(num_t):
 79 |             P[i, :, :], U[i, :, :] = self.compute_traj(max_dist=2)
 80 |         return P, U
 81 | 
 82 |     def precompute(self):
 83 |         print("Precomputing P,U...")
 84 |         self.P, self.U = self.compute_data()
 85 | 
 86 |     def save(self):
 87 |         print("Saving P,U...")
 88 |         np.savez(self.cache, P=self.P, U=self.U)
 89 | 
 90 |     def load(self):
 91 |         print("Loading P,U from %s..." % (self.cache))
 92 |         D = np.load(self.cache)
 93 |         self.P, self.U = D['P'], D['U']
 94 | 
 95 |     def getXp(self, p):
 96 |         # return image X given true state p (position) of robot
 97 |         x = np.copy(self.im)
 98 |         x[p[0] - rw:p[0] + rw + 1, p[1] - rw:
 99 |           p[1] + rw + 1] = 1.  # robot is white on black background
100 |         return x.flat
101 | 
102 |     def getX(self, i, t):
103 |         # i=trajectory index, t=time step
104 |         return self.getXp(self.P[i, t, :])
105 | 
106 |     def getXTraj(self, i):
107 |         # i=traj index
108 |         X = np.zeros((T, x_dim), dtype=np.float)
109 |         for t in range(T):
110 |             X[t, :] = self.getX(i, t)
111 |         return X
112 | 
113 |     def sample(self, batch_size):
114 |         """
115 |     computes (x_t,u_t,x_{t+1}) pair
116 |     returns tuple of 3 ndarrays with shape
117 |     (batch,x_dim), (batch, u_dim), (batch, x_dim)
118 |     """
119 |         if not self.initialized:
120 |             raise ValueError(
121 |                 "Dataset not loaded - call PlaneData.initialize() first.")
122 |         traj = randint(0, num_t, size=batch_size)  # which trajectory
123 |         tt = randint(0, T - 1, size=batch_size)  # time step t for each batch
124 |         X0 = np.zeros((batch_size, x_dim))
125 |         U0 = np.zeros((batch_size, u_dim), dtype=np.int)
126 |         X1 = np.zeros((batch_size, x_dim))
127 |         for i in range(batch_size):
128 |             t = tt[i]
129 |             p = self.P[traj[i], t, :]
130 |             X0[i, :] = self.getX(traj[i], t)
131 |             X1[i, :] = self.getX(traj[i], t + 1)
132 |             U0[i, :] = self.U[traj[i], t, :]
133 |         return (X0, U0, X1)
134 | 
135 |     def getPSpace(self):
136 |         """
137 |     Returns all possible positions of agent
138 |     """
139 |         ww = h - 2 * rw
140 |         P = np.zeros((ww * ww, 2))  # max possible positions
141 |         i = 0
142 |         p = np.array([rw, rw])  # initial location
143 |         for dr in range(ww):
144 |             for dc in range(ww):
145 |                 if not self.is_colliding(p + np.array([dr, dc])):
146 |                     P[i, :] = p + np.array([dr, dc])
147 |                     i += 1
148 |         return P[:i, :]
149 | 
150 |     def getXPs(self, Ps):
151 |         X = np.zeros((Ps.shape[0], x_dim))
152 |         for i in range(Ps.shape[0]):
153 |             X[i, :] = self.getXp(Ps[i, :])
154 |         return X
155 | 
156 | 
157 | if __name__ == "__main__":
158 |     import matplotlib.animation as animation
159 |     p = PlaneData("plane2.npz", "env1.png")
160 |     p.initialize()
161 |     p.save()
162 |     im = p.im
163 |     A, B = im.shape
164 | 
165 |     # show sample tuples
166 |     if True:
167 |         fig, aa = plt.subplots(1, 2)
168 |         x0, u0, x1 = p.sample(2)
169 |         m1 = aa[0].matshow(
170 |             x0[0, :].reshape(w, w), cmap=plt.cm.gray, vmin=0., vmax=1.)
171 |         aa[0].set_title('x(t)')
172 |         m2 = aa[1].matshow(
173 |             x1[0, :].reshape(w, w), cmap=plt.cm.gray, vmin=0., vmax=1.)
174 |         aa[1].set_title('x(t+1), u=(%d,%d)' % (u0[0, 0], u0[0, 1]))
175 |         fig.tight_layout()
176 | 
177 |         def updatemat2(t):
178 |             x0, u0, x1 = p.sample(2)
179 |             m1.set_data(x0[0, :].reshape(w, w))
180 |             m2.set_data(x1[0, :].reshape(w, w))
181 |             return m1, m2
182 | 
183 |         anim = animation.FuncAnimation(
184 |             fig, updatemat2, frames=100, interval=1000, blit=True, repeat=True)
185 | 
186 |         Writer = animation.writers['imagemagick']  # animation.writers.avail
187 |         writer = Writer(fps=1, metadata=dict(artist='Me'), bitrate=1800)
188 |         anim.save('sample_obs.gif', writer=writer)
189 | 
190 |     #show trajectory
191 |     if True:
192 |         fig, ax = plt.subplots()
193 |         X = p.getXTraj(0)
194 |         mat = ax.matshow(
195 |             X[0, :].reshape((A, B)), cmap=plt.cm.gray, vmin=0., vmax=1.)
196 | 
197 |         def updatemat(t):
198 |             mat.set_data(X[t, :].reshape((A, B)))
199 |             return mat,
200 | 
201 |         anim = animation.FuncAnimation(
202 |             fig, updatemat, frames=T - 1, interval=30, blit=True, repeat=True)
203 |         plt.show()
204 | 


--------------------------------------------------------------------------------
/e2c/tf_e2c/tests.py:
--------------------------------------------------------------------------------
 1 | """
 2 | test functions like KLGaussian to make sure you implemented correctly
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | # ground truth implementation
 8 | from divergence import gau_kl
 9 | 
10 | pm = np.array([1., 1., 1.], dtype=np.float32)  # true
11 | pv = np.array([0.1, 0.3, 0.5], dtype=np.float32)  # diagonal covariance
12 | qm = np.array([0., 0., 0.], dtype=np.float32)
13 | qv = np.array([1., 1., 1.], dtype=np.float32)
14 | 
15 | KL, a, b, c = gau_kl(pm, pv, qm, qv)  # assumes diagonal covariances...
16 | print('KL : %f' % (KL))
17 | print('trace term : %f' % (a))
18 | print('difference of means : %f' % (b))
19 | print('ratio of determinants : %f' % (c))
20 | 
21 | # my implementation
22 | 
23 | import tensorflow as tf
24 | from e2c import NormalDistribution, KLGaussian
25 | batch_size = 1
26 | z_dim = 3
27 | 
28 | I = tf.identity(np.tile(np.eye(z_dim, dtype=np.float32),
29 |                         [batch_size, 1,
30 |                          1]))  # identity matrix (batch_size, z_dim, z_dim)
31 | zero_z = tf.constant(0., shape=[batch_size, z_dim])
32 | 
33 | pm = pm.reshape((batch_size, z_dim))
34 | pv = pv.reshape((batch_size, z_dim))
35 | qm = qm.reshape((batch_size, z_dim))
36 | qv = qv.reshape((batch_size, z_dim))
37 | 
38 | pmu = tf.constant(pm, shape=[batch_size, z_dim])
39 | psigma = tf.constant(np.sqrt(pv))
40 | P = NormalDistribution(pmu, psigma, tf.log(psigma), zero_z, zero_z)
41 | 
42 | qmu = tf.constant(qm, shape=[batch_size, z_dim])
43 | qsigma = tf.constant(np.sqrt(qv))
44 | Q = NormalDistribution(qmu, qsigma, tf.log(qsigma), zero_z, zero_z)
45 | 
46 | # sigma0=tf.constant([0.1,0.3,0.5])
47 | # P=NormalDistribution(one_z, sigma0, tf.log(sigma0), zero_z, zero_z)
48 | # Pz=NormalDistribution(zero_z, one_z, zero_z, zero_z, zero_z)# prior on Q_phi = mean 0, unit variance => logsigma=0
49 | 
50 | KL, a, b, c = KLGaussian(P, Q, "tmp")
51 | 
52 | sess = tf.InteractiveSession()
53 | 
54 | results = sess.run([KL, a, b, c])
55 | print('KL : %f' % (results[0]))
56 | print('trace term : %f' % (results[1]))
57 | print('difference of means : %f' % (results[2]))
58 | print('ratio of determinants : %f' % (results[3]))
59 | 
60 | sess.close()
61 | 


--------------------------------------------------------------------------------
/e2c/tf_e2c/vae.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | """
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | import matplotlib.pyplot as plt
  8 | import os
  9 | from tensorflow.examples.tutorials import mnist
 10 | 
 11 | A = B = 40
 12 | x_dim = A * B
 13 | z_dim = 2
 14 | 
 15 | eps = 1e-9  # numerical stability
 16 | 
 17 | 
 18 | def orthogonal_initializer(scale=1.1):
 19 |     ''' From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
 20 |   '''
 21 |     print(
 22 |         'Warning -- You have opted to use the orthogonal_initializer function')
 23 | 
 24 |     def _initializer(shape, dtype=tf.float32):
 25 |         flat_shape = (shape[0], np.prod(shape[1:]))
 26 |         a = np.random.normal(0.0, 1.0, flat_shape)
 27 |         u, _, v = np.linalg.svd(a, full_matrices=False)
 28 |         # pick the one with the correct shape
 29 |         q = u if u.shape == flat_shape else v
 30 |         q = q.reshape(shape)  #this needs to be corrected to float32
 31 |         print('you have initialized one orthogonal matrix.')
 32 |         return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
 33 | 
 34 |     return _initializer
 35 | 
 36 | 
 37 | class NormalDistribution(object):
 38 |     """docstring for NormalDistribution"""
 39 | 
 40 |     def __init__(self, mu, sigma, logsigma):
 41 |         super(NormalDistribution, self).__init__()
 42 |         self.mu = mu
 43 |         self.sigma = sigma
 44 |         self.logsigma = logsigma
 45 | 
 46 | 
 47 | def linear(x, output_dim):
 48 |     #w=tf.get_variable("w", [x.get_shape()[1], output_dim], initializer=tf.random_normal_initializer(mean=0.0, stddev=.01)) 
 49 |     w = tf.get_variable(
 50 |         "w", [x.get_shape()[1], output_dim],
 51 |         initializer=orthogonal_initializer(1.1))
 52 |     b = tf.get_variable(
 53 |         "b", [output_dim], initializer=tf.constant_initializer(0.0))
 54 |     return tf.matmul(x, w) + b
 55 | 
 56 | 
 57 | def ReLU(x, output_dim, scope):
 58 |     with tf.variable_scope(scope):
 59 |         return tf.nn.relu(linear(x, output_dim))
 60 | 
 61 | 
 62 | def encode(x):
 63 |     with tf.variable_scope("encoder"):
 64 |         for l in range(3):
 65 |             x = ReLU(x, 150, "aggregate_loss" + str(l))
 66 |         return linear(x, 4)
 67 |         #return tf.nn.relu(linear(x,z_dim))
 68 | 
 69 | 
 70 | def sampleNormal(mu, sigma):
 71 |     # note: sigma is diagonal standard deviation, not variance
 72 |     n01 = tf.random_normal(mu.get_shape(), mean=0, stddev=1)
 73 |     return mu + sigma * n01
 74 | 
 75 | 
 76 | def sampleQ(h_enc):
 77 |     """
 78 |   Samples Zt ~ normrnd(mu,sigma) via reparameterization trick for normal dist
 79 |   mu is (batch,z_size)
 80 | 
 81 |   """
 82 |     with tf.variable_scope("sampleQ"):
 83 |         with tf.variable_scope("Q"):
 84 |             mu, log_sigma = tf.split(1, 2, linear(h_enc, z_dim * 2))
 85 |             sigma = tf.exp(log_sigma)  # sigma_t, covariance of Q_phi
 86 |         return sampleNormal(mu, sigma), NormalDistribution(
 87 |             mu, log_sigma, sigma)
 88 | 
 89 | 
 90 | def decode(z):
 91 |     # with tf.variable_scope("decoder"):
 92 |     #   return tf.nn.relu(linear(z,x_dim))
 93 |     with tf.variable_scope("decoder"):
 94 |         for l in range(2):
 95 |             z = ReLU(z, 200, "aggregate_loss" + str(l))
 96 |         return linear(z, x_dim)
 97 | 
 98 | 
 99 | def binary_crossentropy(t, o):
100 |     return -(t * tf.log(o + eps) + (1.0 - t) * tf.log(1.0 - o + eps))
101 | 
102 | 
103 | def recons_loss(x, x_recons):
104 |     with tf.variable_scope("Lx"):
105 |         return tf.reduce_sum(binary_crossentropy(x, x_recons),
106 |                              1)  # sum across features
107 | 
108 | 
109 | def latent_loss(Q):
110 |     # KL distribution between distribution in latent space and some prior
111 |     # (regularizer)
112 |     with tf.variable_scope("Lz"):
113 |         mu2 = tf.square(Q.mu)
114 |         sigma2 = tf.square(Q.sigma)
115 |         #return 0.5*tf.reduce_sum(1.+mu2+sigma2-2.*Q.logsigma,1) # sum across features
116 |         # negative of the upper bound of posterior
117 |         return -0.5 * tf.reduce_sum(1 + 2 * Q.logsigma - mu2 - sigma2, 1)
118 | 
119 | 
120 | def sampleP_theta(h_dec):
121 |     # sample x from bernoulli distribution with means p=W(h_dec)
122 |     with tf.variable_scope("P_theta"):
123 |         p = linear(h_dec, x_dim)
124 |         return tf.sigmoid(p)  # mean of bernoulli distribution
125 | 
126 | 
127 | # BUILD NETWORK
128 | batch_size = 64
129 | x = tf.placeholder(tf.float32, [batch_size, x_dim])
130 | h_enc = encode(x)  # encoded space
131 | z, Q = sampleQ(h_enc)  # z - latent space
132 | #h_dec=decode(h_enc) # regular autoencoder
133 | h_dec = decode(z)  # decoded space
134 | x_recons = sampleP_theta(h_dec)  # original space
135 | 
136 | with tf.variable_scope("Loss"):
137 |     L_x = recons_loss(x, x_recons)
138 |     L_z = latent_loss(Q)
139 |     loss = tf.reduce_mean(L_x)
140 |     #loss=tf.reduce_mean(L_x+L_z) # average over minibatch -> single scalar
141 | 
142 | with tf.variable_scope("Optimizer"):
143 |     learning_rate = 1e-4
144 |     optimizer = tf.train.AdamOptimizer(
145 |         learning_rate, beta1=0.1, beta2=0.1)  # beta2=0.1
146 |     train_op = optimizer.minimize(loss)
147 | 
148 | saver = tf.train.Saver()  # saves variables learned during training
149 | 
150 | # summaries
151 | tf.scalar_summary("loss", loss)
152 | tf.scalar_summary("L_x", tf.reduce_mean(L_x))
153 | tf.scalar_summary("L_z", tf.reduce_mean(L_z))
154 | all_summaries = tf.merge_all_summaries()
155 | 
156 | # TRAIN
157 | init = tf.initialize_all_variables()
158 | sess = tf.InteractiveSession()
159 | sess.run(init)
160 | # WRITER
161 | writer = tf.train.SummaryWriter("/ltmp/vae", sess.graph_def)
162 | 
163 | # PLANE TASK
164 | ckpt_file = "vaemodel_plane.ckpt"
165 | from plane_data2 import PlaneData
166 | dataset = PlaneData("plane.npz", "env0.png")
167 | dataset.initialize()
168 | 
169 | # resume training
170 | #saver.restore(sess, ckpt_file)
171 | 
172 | # # TRAIN
173 | if True:
174 |     train_iters = 50000
175 |     for i in range(int(train_iters)):
176 |         (x_val, u_val, x_next_val) = dataset.sample(batch_size)
177 |         #x_val=dataset.sample(batch_size)
178 |         feed_dict = {x: x_val}
179 |         results = sess.run([loss, all_summaries, train_op], feed_dict)
180 |         writer.add_summary(results[1], i)  # write summary data to disk
181 |         if i % 1000 == 0:
182 |             print("iter=%d : Loss: %f" % (i, results[0]))
183 |     # save variables
184 |     print("Model saved in file: %s" % saver.save(sess, ckpt_file))
185 | 
186 | if True:
187 |     saver.restore(sess, ckpt_file)
188 |     (x_val, u_val, x_next_val) = dataset.sample(batch_size)
189 |     #x_val=dataset.sample(batch_size)
190 |     xr = sess.run(x_recons, {x: x_val})
191 |     fig, arr = plt.subplots(10, 2)
192 |     for i in range(10):
193 |         arr[i, 0].matshow(
194 |             x_val[i, :].reshape((A, B)), cmap=plt.cm.gray, vmin=0, vmax=1)
195 |         arr[i, 1].matshow(
196 |             xr[i, :].reshape((A, B)), cmap=plt.cm.gray, vmin=0, vmax=1)
197 |     plt.show()
198 | 
199 | sess.close()
200 | 


--------------------------------------------------------------------------------
/e2c/tf_e2c/viz_results.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Quick-and-dirty visualization scripts for a variety of tasks.    
  3 | """
  4 | 
  5 | import sequential_e2c as e2c
  6 | #import e2c_plane_z as e2c
  7 | 
  8 | import matplotlib.pyplot as plt
  9 | import tensorflow as tf
 10 | import numpy as np
 11 | from data.plane_data2 import PlaneData, get_params
 12 | 
 13 | import ipdb as pdb
 14 | 
 15 | 
 16 | def show_recons_samples(sess, ckptfile):
 17 |     # visualize sample reconstructions
 18 |     e2c.saver.restore(sess, ckptfile)  # restore variable values
 19 |     dataset = PlaneData("data/plane1.npz", "data/env1.png")
 20 |     dataset.initialize()
 21 |     (x_val, u_val, x_next_val) = dataset.sample(e2c.batch_size)
 22 |     xr, xp = sess.run(
 23 |         [e2c.x_recons, e2c.x_predict],
 24 |         feed_dict={e2c.x: x_val,
 25 |                    e2c.u: u_val,
 26 |                    e2c.x_next: x_next_val})
 27 |     #xr,xp=sess.run([e2c.x_recons0, e2c.x_predict0],feed_dict={e2c.x0:x_val,e2c.u0:u_val,e2c.x1:x_next_val})
 28 |     A, B = e2c.A, e2c.B
 29 | 
 30 |     def getimgs(x, xnext):
 31 |         padsize = 1
 32 |         padval = .5
 33 |         ph = B + 2 * padsize
 34 |         pw = A + 2 * padsize
 35 |         img = np.ones((10 * ph, 2 * pw)) * padval
 36 |         for i in range(10):
 37 |             startr = i * ph + padsize
 38 |             img[startr:startr + B, padsize:padsize + A] = x[i, :].reshape((A,
 39 |                                                                            B))
 40 |         for i in range(10):
 41 |             startr = i * ph + padsize
 42 |             img[startr:startr + B, pw + padsize:pw + padsize + A] = xnext[
 43 |                 i, :].reshape((A, B))
 44 |         return img
 45 | 
 46 |     fig, arr = plt.subplots(1, 2)
 47 |     arr[0].matshow(
 48 |         getimgs(x_val, x_next_val), cmap=plt.cm.gray, vmin=0, vmax=1)
 49 |     arr[0].set_title('Data')
 50 |     arr[1].matshow(getimgs(xr, xp), cmap=plt.cm.gray, vmin=0, vmax=1)
 51 |     arr[1].set_title('Reconstruction')
 52 |     plt.show()
 53 | 
 54 | 
 55 | def show_recons_seq(sess, ckptfile):
 56 |     e2c.saver.restore(sess, ckptfile)  # restore variable values
 57 |     dataset = PlaneData("data/plane1.npz", "data/env1.png")
 58 |     dataset.initialize()
 59 |     T = e2c.T
 60 |     print(T)
 61 |     (x_vals, u_vals) = dataset.sample_seq(e2c.batch_size, T)
 62 |     feed_dict = {}
 63 |     for t in range(T):
 64 |         feed_dict[e2c.xs[t]] = x_vals[t]
 65 |     for t in range(T - 1):
 66 |         feed_dict[e2c.us[t]] = u_vals[t]
 67 | 
 68 |     fetches = e2c.x_recons + e2c.x_predicts
 69 |     results = sess.run(fetches, feed_dict)
 70 |     xr = results[:T - 1]
 71 |     xp = results[T - 1:]
 72 |     A, B = e2c.A, e2c.B
 73 | 
 74 |     def getimgs(x):
 75 |         padsize = 1
 76 |         padval = .5
 77 |         ph = B + 2 * padsize
 78 |         pw = A + 2 * padsize
 79 |         img = np.ones((ph, len(x) * pw)) * padval
 80 |         for t in range(len(x)):
 81 |             startc = t * pw + padsize
 82 |             img[padsize:padsize + B, startc:startc + A] = x[t][1, :].reshape(
 83 |                 (A, B))
 84 |         return img
 85 | 
 86 |     fig, arr = plt.subplots(3, 1)
 87 |     arr[0].matshow(getimgs(x_vals), cmap=plt.cm.gray, vmin=0, vmax=1)
 88 |     arr[0].set_title('X')
 89 |     arr[1].matshow(getimgs(xr), cmap=plt.cm.gray, vmin=0, vmax=1)
 90 |     arr[1].set_title('Reconstruction')
 91 |     arr[2].matshow(getimgs(xp), cmap=plt.cm.gray, vmin=0, vmax=1)
 92 |     arr[2].set_title('Prediction')
 93 |     plt.show()
 94 | 
 95 | 
 96 | def viz_z(sess, ckptfile):
 97 |     e2c.saver.restore(sess, ckptfile)  # restore variable values
 98 |     dataset = PlaneData("data/plane1.npz", "data/env1.png")
 99 |     Ps, NPs = dataset.getPSpace()
100 |     batch_size = e2c.batch_size
101 |     n0 = NPs.shape[0]
102 |     if False:
103 |         Ps = np.vstack((Ps, NPs))
104 |     xy = np.zeros([Ps.shape[0], 2])
105 |     xy[:, 0] = Ps[:, 1]
106 |     xy[:,
107 |        1] = 20 - Ps[:,
108 |                     0]  # for the purpose of computing theta, map centered @ origin
109 |     Zs = np.zeros([Ps.shape[0], e2c.z_dim])
110 | 
111 |     theta = np.arctan(xy[:, 1] / xy[:, 0])
112 |     for i in range(Ps.shape[0] // batch_size):
113 |         print("batch %d" % i)
114 |         x_val = dataset.getXPs(Ps[i * batch_size:(i + 1) * batch_size, :])
115 |         Zs[i * batch_size:(i + 1) * batch_size, :] = sess.run(
116 |             e2c.z, {e2c.x: x_val})
117 |     # last remaining points may not fit precisely into 1 minibatch.
118 |     x_val = dataset.getXPs(Ps[-batch_size:, :])
119 |     Zs[-batch_size:, :] = sess.run(e2c.z, {e2c.x: x_val})
120 | 
121 |     if False:
122 |         theta[-n0:] = 1
123 | 
124 |     fig, arr = plt.subplots(1, 2)
125 |     arr[0].scatter(Ps[:, 1], 40 - Ps[:, 0], c=(np.pi + theta) / (2 * np.pi))
126 |     arr[0].set_title('True State Space')
127 |     arr[1].scatter(Zs[:, 0], Zs[:, 1], c=(np.pi + theta) / (2 * np.pi))
128 |     arr[1].set_title('Latent Space Z')
129 |     #plt.show()
130 |     return fig
131 | 
132 | 
133 | def viz_z_unfold(sess, cpktprefix):
134 |     d = 100  # interval
135 |     for i in range(int(1e5) // d):
136 |         f = "%s-%05d" % (cpktprefix, i * d)
137 |         ckptfile = f + ".ckpt"
138 |         print(ckptfile)
139 |         fig = viz_z(sess, ckptfile)
140 |         fig.suptitle('%d' % (i * d))
141 |         fig.savefig(f + ".png")
142 |         # combine with convert -delay 10 -loop 0 e2c-plane-*.png out.gif
143 |         # then reduce size using gifsicle -O3-colors 256 < out.gif > new.gif
144 |     print('done!')
145 | 
146 | 
147 | if __name__ == "__main__":
148 |     sess = tf.InteractiveSession()
149 |     #viz_z_unfold(sess, "/ltmp/e2c-plane")
150 |     fig = viz_z(sess, "/ltmp/e2c-plane-199000.ckpt")
151 |     #show_recons_samples(sess,"/ltmp/e2c-plane-186000.ckpt")
152 |     show_recons_seq(sess, "/ltmp/e2c-plane-199000.ckpt")
153 |     plt.show()
154 |     sess.close()
155 | 


--------------------------------------------------------------------------------
/e2c/vae.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch import nn
 4 | from .configs import load_config
 5 | from .losses import binary_crossentropy
 6 | 
 7 | 
 8 | class VAE(torch.nn.Module):
 9 |     def __init__(self, dim_in, dim_z, config='pendulum'):
10 |         super(VAE, self).__init__()
11 |         enc, trans, dec = load_config(config)
12 |         self.encoder = enc(dim_in, dim_z)
13 |         self.decoder = dec(dim_z, dim_in)
14 | 
15 |     def reparam(self, mean, logvar):
16 |         std = logvar.mul(0.5).exp_()
17 |         self.z_mean = mean
18 |         self.z_sigma = std
19 |         eps = torch.FloatTensor(std.size()).normal_()
20 |         if std.data.is_cuda:
21 |             eps.cuda()
22 |         eps = Variable(eps)
23 |         return eps.mul(std).add_(mean)
24 | 
25 |     def forward(self, x):
26 |         self.z_mean, logvar = self.encoder(x)
27 |         self.z_logsigma = logvar.mul(0.5)
28 | 
29 |         z = self.reparam(self.z_mean, logvar)
30 |         x_dec = self.decoder(z)
31 | 
32 |         return x_dec
33 | 
34 |     def latent_embeddings(self, x):
35 |         return self.encoder(x)[0]
36 | 
37 | 
38 | def compute_loss(x_pred, x_true, z_mean, z_logsigma, mse=False):
39 |     # see Appendix B from VAE paper:
40 |     # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
41 |     # https://arxiv.org/abs/1312.6114
42 |     # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
43 |     if mse:
44 |         x_reconst_loss = (x_pred - x_true).pow(2).sum(dim=1)
45 |     else:
46 |         x_reconst_loss = -binary_crossentropy(x_true, x_pred).sum(dim=1)
47 |     logvar = z_logsigma.mul(2)
48 |     KLD_element = z_mean.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
49 |     KLD = torch.sum(KLD_element, dim=1).mul(-0.5)
50 |     return x_reconst_loss.mean(), KLD.mean()


--------------------------------------------------------------------------------