├── LICENSE ├── README.md ├── data └── sample_pendulum_data.py └── e2c ├── __init__.py ├── ae.py ├── configs.py ├── datasets.py ├── e2c.py ├── losses.py ├── tf_e2c ├── LICENSE ├── README.md ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-35.pyc │ ├── __init__.cpython-36.pyc │ ├── dataset.cpython-35.pyc │ ├── dataset.cpython-36.pyc │ ├── plane_data2.cpython-35.pyc │ └── plane_data2.cpython-36.pyc ├── dataset.py ├── e2c_plane.py ├── e2c_seq.py ├── env0.png ├── env1.png ├── env_blank.png ├── plane_data2.py ├── tests.py ├── vae.py └── viz_results.py └── vae.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2017 Yicheng LUO 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Embed to Control implementation in PyTorch 2 | 3 | Paper can be found here: 4 | 5 | You will need a patched version of OpenAI Gym in order to generate the 6 | dataset. See 7 | 8 | For the planar task, we use code from. The source code of the repository 9 | has been modified for our needs and included under `e2c/e2c_tf`. 10 | 11 | ## What's included ? 12 | * E2C model, VAE and AE baselines. Allow configuration for different 13 | network architecture for the different setups (see Appendix of the paper). 14 | 15 | ## TODO 16 | * Documentation, tests... (Soon to follow) 17 | -------------------------------------------------------------------------------- /data/sample_pendulum_data.py: -------------------------------------------------------------------------------- 1 | from pixel2torque.pytorch.datasets import GymPendulumDatasetV2 2 | import numpy as np 3 | 4 | np.random.seed(0) 5 | 6 | GymPendulumDatasetV2.sample(10000, 'data/pendulum_markov') 7 | dataset = GymPendulumDatasetV2('data/pendulum_markov') 8 | -------------------------------------------------------------------------------- /e2c/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/__init__.py -------------------------------------------------------------------------------- /e2c/ae.py: -------------------------------------------------------------------------------- 1 | """ 2 | Autoencoder baseline 3 | """ 4 | 5 | import torch 6 | from torch import nn 7 | from .losses import kl_bernoulli 8 | 9 | 10 | class AE(nn.Module): 11 | def __init__(self, dim_in, dim_z, config='pendulum'): 12 | super(AE, self).__init__() 13 | _, _, dec = load_config(config) 14 | 15 | # TODO, refactor encoder to allow output of dim_z instead of dim_z * 2 16 | self.encoder = nn.Sequential( 17 | nn.Linear(dim_in, 800), 18 | nn.BatchNorm1d(800), 19 | nn.ReLU(), 20 | nn.Linear(800, 800), 21 | nn.BatchNorm1d(800), 22 | nn.ReLU(), 23 | nn.Linear(800, dim_z), 24 | nn.BatchNorm1d(dim_z), 25 | nn.Sigmoid() 26 | ) 27 | 28 | self.decoder = dec(dim_z, dim_in) 29 | 30 | def forward(self, x): 31 | self.z = self.encoder(x) 32 | return self.decoder(self.z) 33 | 34 | 35 | def compute_loss(x_pred, x_true, z_pred, z_true, beta=0.05): 36 | mse = nn.MSELoss() 37 | return mse(x_pred, x_true).add(beta * kl_bernoulli(z_pred, z_true)) 38 | 39 | from pixel2torque.pytorch.configs import load_config 40 | -------------------------------------------------------------------------------- /e2c/configs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration for the encoder, decoder, transition 3 | for different tasks. Use load_config to find the proper 4 | set of configuration. 5 | """ 6 | import torch 7 | from torch import nn 8 | from torch.autograd import Variable 9 | 10 | 11 | class Encoder(nn.Module): 12 | def __init__(self, enc, dim_in, dim_out): 13 | super(Encoder, self).__init__() 14 | self.m = enc 15 | self.dim_int = dim_in 16 | self.dim_out = dim_out 17 | 18 | def forward(self, x): 19 | return self.m(x).chunk(2, dim=1) 20 | 21 | 22 | class Decoder(nn.Module): 23 | def __init__(self, dec, dim_in, dim_out): 24 | super(Decoder, self).__init__() 25 | self.m = dec 26 | self.dim_in = dim_in 27 | self.dim_out = dim_out 28 | 29 | def forward(self, z): 30 | return self.m(z) 31 | 32 | 33 | class Transition(nn.Module): 34 | def __init__(self, trans, dim_z, dim_u): 35 | super(Transition, self).__init__() 36 | self.trans = trans 37 | self.dim_z = dim_z 38 | self.dim_u = dim_u 39 | 40 | self.fc_B = nn.Linear(dim_z, dim_z * dim_u) 41 | self.fc_o = nn.Linear(dim_z, dim_z) 42 | 43 | def forward(self, h, Q, u): 44 | batch_size = h.size()[0] 45 | v, r = self.trans(h).chunk(2, dim=1) 46 | v1 = v.unsqueeze(2) 47 | rT = r.unsqueeze(1) 48 | I = Variable(torch.eye(self.dim_z).repeat(batch_size, 1, 1)) 49 | if rT.data.is_cuda: 50 | I.dada.cuda() 51 | A = I.add(v1.bmm(rT)) 52 | 53 | B = self.fc_B(h).view(-1, self.dim_z, self.dim_u) 54 | o = self.fc_o(h) 55 | 56 | # need to compute the parameters for distributions 57 | # as well as for the samples 58 | u = u.unsqueeze(2) 59 | 60 | d = A.bmm(Q.mu.unsqueeze(2)).add(B.bmm(u)).add(o).squeeze(2) 61 | sample = A.bmm(h.unsqueeze(2)).add(B.bmm(u)).add(o).squeeze(2) 62 | 63 | return sample, NormalDistribution(d, Q.sigma, Q.logsigma, v=v, r=r) 64 | 65 | 66 | class PlaneEncoder(Encoder): 67 | def __init__(self, dim_in, dim_out): 68 | m = nn.Sequential( 69 | nn.Linear(dim_in, 150), 70 | nn.BatchNorm1d(150), 71 | nn.ReLU(), 72 | nn.Linear(150, 150), 73 | nn.BatchNorm1d(150), 74 | nn.ReLU(), 75 | nn.Linear(150, 150), 76 | nn.BatchNorm1d(150), 77 | nn.ReLU(), 78 | nn.Linear(150, dim_out*2) 79 | ) 80 | super(PlaneEncoder, self).__init__(m, dim_in, dim_out) 81 | 82 | 83 | class PlaneDecoder(Decoder): 84 | def __init__(self, dim_in, dim_out): 85 | m = nn.Sequential( 86 | nn.Linear(dim_in, 200), 87 | nn.BatchNorm1d(200), 88 | nn.ReLU(), 89 | nn.Linear(200, 200), 90 | nn.BatchNorm1d(200), 91 | nn.ReLU(), 92 | nn.Linear(200, dim_out), 93 | nn.BatchNorm1d(dim_out), 94 | nn.Sigmoid() 95 | ) 96 | super(PlaneDecoder, self).__init__(m, dim_in, dim_out) 97 | 98 | 99 | class PlaneTransition(Transition): 100 | def __init__(self, dim_z, dim_u): 101 | trans = nn.Sequential( 102 | nn.Linear(dim_z, 100), 103 | nn.BatchNorm1d(100), 104 | nn.ReLU(), 105 | nn.Linear(100, 100), 106 | nn.BatchNorm1d(100), 107 | nn.ReLU(), 108 | nn.Linear(100, dim_z*2) 109 | ) 110 | super(PlaneTransition, self).__init__(trans, dim_z, dim_u) 111 | 112 | 113 | class PendulumEncoder(Encoder): 114 | def __init__(self, dim_in, dim_out): 115 | m = nn.ModuleList([ 116 | torch.nn.Linear(dim_in, 800), 117 | nn.BatchNorm1d(800), 118 | nn.ReLU(), 119 | torch.nn.Linear(800, 800), 120 | nn.BatchNorm1d(800), 121 | nn.ReLU(), 122 | nn.Linear(800, 2 * dim_out) 123 | ]) 124 | super(PendulumEncoder, self).__init__(m, dim_in, dim_out) 125 | 126 | def forward(self, x): 127 | for l in self.m: 128 | x = l(x) 129 | return x.chunk(2, dim=1) 130 | 131 | 132 | class PendulumDecoder(Decoder): 133 | def __init__(self, dim_in, dim_out): 134 | m = nn.ModuleList([ 135 | torch.nn.Linear(dim_in, 800), 136 | nn.BatchNorm1d(800), 137 | nn.ReLU(), 138 | torch.nn.Linear(800, 800), 139 | nn.BatchNorm1d(800), 140 | nn.ReLU(), 141 | nn.Linear(800, dim_out), 142 | nn.Sigmoid() 143 | ]) 144 | super(PendulumDecoder, self).__init__(m, dim_in, dim_out) 145 | 146 | def forward(self, z): 147 | for l in self.m: 148 | z = l(z) 149 | return z 150 | 151 | 152 | class PendulumTransition(Transition): 153 | def __init__(self, dim_z, dim_u): 154 | trans = nn.Sequential( 155 | nn.Linear(dim_z, 100), 156 | nn.BatchNorm1d(100), 157 | nn.ReLU(), 158 | nn.Linear(100, 100), 159 | nn.BatchNorm1d(100), 160 | nn.ReLU(), 161 | nn.Linear(100, dim_z * 2), 162 | nn.BatchNorm1d(dim_z * 2), 163 | nn.Sigmoid() # Added to prevent nan 164 | ) 165 | super(PendulumTransition, self).__init__(trans, dim_z, dim_u) 166 | 167 | 168 | _CONFIG_MAP = { 169 | 'plane': (PlaneEncoder, PlaneTransition, PlaneDecoder), 170 | 'pendulum': (PendulumEncoder, PendulumTransition, PendulumDecoder) 171 | } 172 | 173 | 174 | def load_config(name): 175 | """Load a particular configuration 176 | Returns: 177 | (encoder, transition, decoder) A tuple containing class constructors 178 | """ 179 | if name not in _CONFIG_MAP.keys(): 180 | raise ValueError("Unknown config: %s", name) 181 | return _CONFIG_MAP[name] 182 | 183 | from .e2c import NormalDistribution 184 | 185 | __all__ = ['load_config'] 186 | -------------------------------------------------------------------------------- /e2c/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | from os import path 4 | 5 | from PIL import Image 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import gym 10 | import json 11 | from datetime import datetime 12 | from torchvision.transforms import ToTensor 13 | from torch.utils.data import Dataset 14 | from .tf_e2c.plane_data2 import T, num_t 15 | from skimage.transform import resize 16 | from skimage.color import rgb2gray 17 | from tqdm import trange, tqdm 18 | import pickle 19 | 20 | 21 | class PendulumData(Dataset): 22 | def __init__(self, root, split): 23 | if split not in ['train', 'test', 'all']: 24 | raise ValueError 25 | 26 | dir = os.path.join(root, split) 27 | filenames = glob.glob(os.path.join(dir, '*.png')) 28 | 29 | if split == 'all': 30 | filenames = glob.glob(os.path.join(root, 'train/*.png')) 31 | filenames.extend(glob.glob(os.path.join(root, 'test/*.png'))) 32 | 33 | filenames = sorted( 34 | filenames, key=lambda x: int(os.path.basename(x).split('.')[0])) 35 | 36 | images = [] 37 | 38 | for f in filenames: 39 | img = plt.imread(f) 40 | img[img != 1] = 0 41 | images.append(resize(rgb2gray(img), [48, 48], mode='constant')) 42 | 43 | self.images = np.array(images, dtype=np.float32) 44 | self.images = self.images.reshape([len(images), 48, 48, 1]) 45 | 46 | action_filename = os.path.join(root, 'actions.txt') 47 | 48 | with open(action_filename) as infile: 49 | actions = np.array([float(l) for l in infile.readlines()]) 50 | 51 | self.actions = actions[:len(self.images)].astype(np.float32) 52 | self.actions = self.actions.reshape(len(actions), 1) 53 | 54 | def __len__(self): 55 | return len(self.actions) - 1 56 | 57 | def __getitem__(self, index): 58 | return self.images[index], self.actions[index], self.images[index] 59 | 60 | 61 | class PlaneDataset(Dataset): 62 | def __init__(self, planedata): 63 | self.planedata = planedata 64 | 65 | def __len__(self): 66 | return T * num_t # Total number of samples 67 | 68 | def __getitem__(self, index): 69 | index = np.random.randint(0, num_t) # Sample any one of them 70 | t = np.random.randint(0, T - 1) 71 | x = np.array(self.planedata.getX(index, t)) 72 | x_next = np.array(self.planedata.getX(index, t + 1)) 73 | u = np.copy(self.planedata.U[index, t, :]) 74 | return x, u, x_next 75 | 76 | 77 | class GymPendulumDataset(Dataset): 78 | """Dataset definition for the Gym Pendulum task""" 79 | width = 40 80 | height = 40 81 | action_dim = 1 82 | """Sample from the OpenAI Gym environment, requires a patched version of gym""" 83 | 84 | def __init__(self, filename): 85 | _data = np.load(filename) 86 | self.X0 = np.copy(_data['X0']) # Copy to memory, otherwise it's slow. 87 | self.X1 = np.copy(_data['X1']) 88 | self.U = np.copy(_data['U']) 89 | _data.close() 90 | 91 | def __len__(self): 92 | return len(self.X0) 93 | 94 | def __getitem__(self, index): 95 | return self.X0[index], self.U[index], self.X1[index] 96 | 97 | @classmethod 98 | def all_states(cls): 99 | _env = gym.make('Pendulum-v0').env 100 | width = GymPendulumDataset.width 101 | height = GymPendulumDataset.height 102 | X = np.zeros((360, width, height)) 103 | 104 | for i in range(360): 105 | th = i / 360. * 2 * np.pi 106 | state = _env.render_state(th) 107 | X[i, :, :] = resize(rgb2gray(state), (width, height), mode='reflect') 108 | _env.close() 109 | _env.viewer.close() 110 | return X 111 | 112 | @classmethod 113 | def sample_trajectories(self, sample_size, step_size=1, apply_control=True): 114 | _env = gym.make('Pendulum-v0').env 115 | X0 = np.zeros((sample_size, 500, 500, 3), dtype=np.uint8) 116 | U = np.zeros((sample_size, 1), dtype=np.float32) 117 | X1 = np.zeros((sample_size, 500, 500, 3), dtype=np.uint8) 118 | for i in range(sample_size): 119 | th = np.random.uniform(0, np.pi * 2) 120 | # thdot = np.random.uniform(-8, 8) 121 | thdot = 0 122 | state = np.array([th, thdot]) 123 | initial = state 124 | # apply the same control over a few timesteps 125 | if apply_control: 126 | u = np.random.uniform(-2, 2, size=(1,)) 127 | else: 128 | u = np.zeros((1,)) 129 | for _ in range(step_size): 130 | state = _env.step_from_state(state, u) 131 | 132 | X0[i, :, :, :] = _env.render_state(initial[0]) 133 | U[i, :] = u 134 | X1[i, :, :, :] = _env.render_state(state[0]) 135 | _env.viewer.close() 136 | return X0, U, X1 137 | 138 | 139 | class GymPendulumDatasetV2(Dataset): 140 | width = 40 * 2 141 | height = 40 142 | action_dim = 1 143 | 144 | def __init__(self, dir): 145 | self.dir = dir 146 | with open(path.join(dir, 'data.json')) as f: 147 | self._data = json.load(f) 148 | self._process() 149 | 150 | def __len__(self): 151 | return len(self._data['samples']) 152 | 153 | def __getitem__(self, index): 154 | return self._processed[index] 155 | 156 | @staticmethod 157 | def _process_image(img): 158 | return ToTensor()((img.convert('L'). 159 | resize((GymPendulumDatasetV2.width, 160 | GymPendulumDatasetV2.height)))) 161 | 162 | def _process(self): 163 | preprocessed_file = os.path.join(self.dir, 'processed.pkl') 164 | if not os.path.exists(preprocessed_file): 165 | processed = [] 166 | for sample in tqdm(self._data['samples'], desc='processing data'): 167 | before = Image.open(os.path.join(self.dir, sample['before'])) 168 | after = Image.open(os.path.join(self.dir, sample['after'])) 169 | 170 | processed.append((self._process_image(before), 171 | np.array(sample['control']), 172 | self._process_image(after))) 173 | 174 | with open(preprocessed_file, 'wb') as f: 175 | pickle.dump(processed, f) 176 | self._processed = processed 177 | else: 178 | with open(preprocessed_file, 'rb') as f: 179 | self._processed = pickle.load(f) 180 | 181 | @staticmethod 182 | def _render_state_fully_observed(env, state): 183 | before1 = state 184 | before2 = env.step_from_state(state, np.array([0])) 185 | return map(env.render_state, [before1[0], before2[0]]) 186 | 187 | @classmethod 188 | def sample(cls, sample_size, output_dir, step_size=1, 189 | apply_control=True, num_shards=10): 190 | env = gym.make('Pendulum-v0').env 191 | assert sample_size % num_shards == 0 192 | 193 | samples = [] 194 | 195 | if not path.exists(output_dir): 196 | os.makedirs(output_dir) 197 | 198 | for i in trange(sample_size): 199 | th = np.random.uniform(0, np.pi * 2) 200 | thdot = np.random.uniform(-8, 8) 201 | 202 | state = np.array([th, thdot]) 203 | u0 = np.array([0]) 204 | 205 | initial_state = state 206 | before1, before2 = GymPendulumDatasetV2._render_state_fully_observed(env, state) 207 | 208 | # apply the same control over a few timesteps 209 | if apply_control: 210 | u = np.random.uniform(-2, 2, size=(1,)) 211 | else: 212 | u = np.zeros((1,)) 213 | 214 | # state = env.step_from_state(state, u0) 215 | for _ in range(step_size): 216 | state = env.step_from_state(state, u) 217 | 218 | after_state = state 219 | after1, after2 = GymPendulumDatasetV2._render_state_fully_observed(env, state) 220 | 221 | before = np.hstack((before1, before2)) 222 | after = np.hstack((after1, after2)) 223 | 224 | shard_no = i // (sample_size // num_shards) 225 | 226 | shard_path = path.join('{:03d}-of-{:03d}'.format(shard_no, num_shards)) 227 | 228 | if not path.exists(path.join(output_dir, shard_path)): 229 | os.makedirs(path.join(output_dir, shard_path)) 230 | 231 | before_file = path.join(shard_path, 'before-{:05d}.jpg'.format(i)) 232 | plt.imsave(path.join(output_dir, before_file), before) 233 | 234 | after_file = path.join(shard_path, 'after-{:05d}.jpg'.format(i)) 235 | plt.imsave(path.join(output_dir, after_file), after) 236 | 237 | samples.append({ 238 | 'before_state': initial_state.tolist(), 239 | 'after_state': after_state.tolist(), 240 | 'before': before_file, 241 | 'after': after_file, 242 | 'control': u.tolist(), 243 | }) 244 | 245 | with open(path.join(output_dir, 'data.json'), 'wt') as outfile: 246 | json.dump( 247 | { 248 | 'metadata': { 249 | 'num_samples': sample_size, 250 | 'step_size': step_size, 251 | 'apply_control': apply_control, 252 | 'time_created': str(datetime.now()), 253 | 'version': 1 254 | }, 255 | 'samples': samples 256 | }, outfile, indent=2) 257 | 258 | env.viewer.close() 259 | -------------------------------------------------------------------------------- /e2c/e2c.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Variable 4 | 5 | from .losses import binary_crossentropy 6 | 7 | 8 | class NormalDistribution(object): 9 | """ 10 | Wrapper class representing a multivariate normal distribution parameterized by 11 | N(mu,Cov). If cov. matrix is diagonal, Cov=(sigma).^2. Otherwise, 12 | Cov=A*(sigma).^2*A', where A = (I+v*r^T). 13 | """ 14 | 15 | def __init__(self, mu, sigma, logsigma, *, v=None, r=None): 16 | self.mu = mu 17 | self.sigma = sigma 18 | self.logsigma = logsigma 19 | self.v = v 20 | self.r = r 21 | 22 | @property 23 | def cov(self): 24 | """This should only be called when NormalDistribution represents one sample""" 25 | if self.v is not None and self.r is not None: 26 | assert self.v.dim() == 1 27 | dim = self.v.dim() 28 | v = self.v.unsqueeze(1) # D * 1 vector 29 | rt = self.r.unsqueeze(0) # 1 * D vector 30 | A = torch.eye(dim) + v.mm(rt) 31 | return A.mm(torch.diag(self.sigma.pow(2)).mm(A.t())) 32 | else: 33 | return torch.diag(self.sigma.pow(2)) 34 | 35 | 36 | def KLDGaussian(Q, N, eps=1e-8): 37 | """KL Divergence between two Gaussians 38 | Assuming Q ~ N(mu0, A\sigma_0A') where A = I + vr^{T} 39 | and N ~ N(mu1, \sigma_1) 40 | """ 41 | sum = lambda x: torch.sum(x, dim=1) 42 | k = float(Q.mu.size()[1]) # dimension of distribution 43 | mu0, v, r, mu1 = Q.mu, Q.v, Q.r, N.mu 44 | s02, s12 = (Q.sigma).pow(2) + eps, (N.sigma).pow(2) + eps 45 | a = sum(s02 * (1. + 2. * v * r) / s12) + sum(v.pow(2) / s12) * sum(r.pow(2) * s02) # trace term 46 | b = sum((mu1 - mu0).pow(2) / s12) # difference-of-means term 47 | c = 2. * (sum(N.logsigma - Q.logsigma) - torch.log(1. + sum(v * r) + eps)) # ratio-of-determinants term. 48 | 49 | # 50 | # print('trace: %s' % a) 51 | # print('mu_diff: %s' % b) 52 | # print('k: %s' % k) 53 | # print('det: %s' % c) 54 | 55 | return 0.5 * (a + b - k + c) 56 | 57 | 58 | class E2C(nn.Module): 59 | def __init__(self, dim_in, dim_z, dim_u, config='pendulum'): 60 | super(E2C, self).__init__() 61 | enc, trans, dec = load_config(config) 62 | self.encoder = enc(dim_in, dim_z) 63 | 64 | self.decoder = dec(dim_z, dim_in) 65 | self.trans = trans(dim_z, dim_u) 66 | 67 | def encode(self, x): 68 | return self.encoder(x) 69 | 70 | def decode(self, z): 71 | return self.decoder(z) 72 | 73 | def transition(self, z, Qz, u): 74 | return self.trans(z, Qz, u) 75 | 76 | def reparam(self, mean, logvar): 77 | std = logvar.mul(0.5).exp_() 78 | self.z_mean = mean 79 | self.z_sigma = std 80 | eps = torch.FloatTensor(std.size()).normal_() 81 | if std.data.is_cuda: 82 | eps.cuda() 83 | eps = Variable(eps) 84 | return eps.mul(std).add_(mean), NormalDistribution(mean, std, torch.log(std)) 85 | 86 | def forward(self, x, action, x_next): 87 | mean, logvar = self.encode(x) 88 | mean_next, logvar_next = self.encode(x_next) 89 | 90 | z, self.Qz = self.reparam(mean, logvar) 91 | z_next, self.Qz_next = self.reparam(mean_next, logvar_next) 92 | 93 | self.x_dec = self.decode(z) 94 | self.x_next_dec = self.decode(z_next) 95 | 96 | self.z_next_pred, self.Qz_next_pred = self.transition(z, self.Qz, action) 97 | self.x_next_pred_dec = self.decode(self.z_next_pred) 98 | 99 | return self.x_next_pred_dec 100 | 101 | def latent_embeddings(self, x): 102 | return self.encode(x)[0] 103 | 104 | def predict(self, X, U): 105 | mean, logvar = self.encode(X) 106 | z, Qz = self.reparam(mean, logvar) 107 | z_next_pred, Qz_next_pred = self.transition(z, Qz, U) 108 | return self.decode(z_next_pred) 109 | 110 | 111 | def compute_loss(x_dec, x_next_pred_dec, x, x_next, 112 | Qz, Qz_next_pred, 113 | Qz_next): 114 | # Reconstruction losses 115 | if False: 116 | x_reconst_loss = (x_dec - x_next).pow(2).sum(dim=1) 117 | x_next_reconst_loss = (x_next_pred_dec - x_next).pow(2).sum(dim=1) 118 | else: 119 | x_reconst_loss = -binary_crossentropy(x, x_dec).sum(dim=1) 120 | x_next_reconst_loss = -binary_crossentropy(x_next, x_next_pred_dec).sum(dim=1) 121 | 122 | logvar = Qz.logsigma.mul(2) 123 | KLD_element = Qz.mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar) 124 | KLD = torch.sum(KLD_element, dim=1).mul(-0.5) 125 | 126 | # ELBO 127 | bound_loss = x_reconst_loss.add(x_next_reconst_loss).add(KLD) 128 | kl = KLDGaussian(Qz_next_pred, Qz_next) 129 | return bound_loss.mean(), kl.mean() 130 | 131 | from .configs import load_config 132 | -------------------------------------------------------------------------------- /e2c/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def binary_crossentropy(t, o, eps=1e-8): 5 | return t * torch.log(o + eps) + (1.0 - t) * torch.log(1.0 - o + eps) 6 | 7 | 8 | def kl_bernoulli(p, q, eps=1e-8): 9 | # http://ufldl.stanford.edu/tutorial/unsupervised/Autoencoders/ 10 | kl = p * torch.log((p + eps) / (q + eps)) + \ 11 | (1 - p) * torch.log((1 - p + eps) / (1 - q + eps)) 12 | return kl.mean() 13 | -------------------------------------------------------------------------------- /e2c/tf_e2c/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {2017} {Eric Jang} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /e2c/tf_e2c/README.md: -------------------------------------------------------------------------------- 1 | # e2c 2 | 3 | TensorFlow impementation of: [Embed to Control: A Locally Linear Latent Dynamics Model for Control from Raw Images](http://arxiv.org/abs/1506.07365), with code optimized for clarity and simplicity. 4 | 5 | ![latent](http://i.imgur.com/zO5G3K0.png) 6 | 7 | Only 160 lines of code, and only uses Python modules that come installed with TensorFlow. Proper writeup explaining the paper plus improved model code to soon follow. 8 | 9 | ## Results 10 | 11 | Left column are x_t, x_{t+1}, and right column are the E2C reconstructions. 12 | ![reconstruction](https://1.bp.blogspot.com/-L2qTQr8XZMY/Vv3cgLAklqI/AAAAAAAAE8g/rjMk2Z98XxEalKyXvtZUGeHtArdsD2vBg/s640/figure_1.png) 13 | 14 | Larger step sizes (magnitude of u) yield better latent space reconstruction... 15 | 16 | ![unfolding latent space](http://i.imgur.com/DF6Gd96.gif) 17 | 18 | but degrade image reconstruction fidelity (more on this later...). Here's a different set of obstacles: 19 | 20 | ![poor reconstruction](http://i.imgur.com/cl9RjlR.png) 21 | 22 | ## Features: 23 | - Implements the standard E2C model with the factorized Gaussian KL divergence term (Eq. 14) 24 | - Adam Optimizer + Orthogonal weight initialization scheme by [Saxe et al.](http://arxiv.org/abs/1312.6120). 25 | - Learns the latent space of the planar control task (uses the same parameters described in the paper, Appendix B.6.2) 26 | 27 | ## Training the Model 28 | 29 | First, generate the synthetic training data `plane2.npz` by running the following script. 30 | 31 | ```bash 32 | $ python plane_data2.py 33 | ``` 34 | 35 | Then, train the model 36 | ```bash 37 | $ python e2c.py 38 | ``` 39 | 40 | You can then generate visualizations by executing: 41 | 42 | ```bash 43 | $ python viz_results.py 44 | ``` 45 | 46 | ## Acknowledgements 47 | 48 | Thanks to Manuel Watter for answering my questions about the paper. 49 | 50 | ## License 51 | 52 | Apache 2.0 53 | -------------------------------------------------------------------------------- /e2c/tf_e2c/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__init__.py -------------------------------------------------------------------------------- /e2c/tf_e2c/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /e2c/tf_e2c/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /e2c/tf_e2c/__pycache__/dataset.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/dataset.cpython-35.pyc -------------------------------------------------------------------------------- /e2c/tf_e2c/__pycache__/dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/dataset.cpython-36.pyc -------------------------------------------------------------------------------- /e2c/tf_e2c/__pycache__/plane_data2.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/plane_data2.cpython-35.pyc -------------------------------------------------------------------------------- /e2c/tf_e2c/__pycache__/plane_data2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/__pycache__/plane_data2.cpython-36.pyc -------------------------------------------------------------------------------- /e2c/tf_e2c/dataset.py: -------------------------------------------------------------------------------- 1 | # abstract helper class customized for training E2C tasks 2 | 3 | 4 | class DataSet(object): 5 | """docstring for DataSet""" 6 | 7 | def __init__(self): 8 | super(DataSet, self).__init__() 9 | -------------------------------------------------------------------------------- /e2c/tf_e2c/e2c_plane.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Implementation of Embed-to-Control model: http://arxiv.org/abs/1506.07365 4 | Code is organized for simplicity and readability w.r.t paper. 5 | 6 | Author: Eric Jang 7 | """ 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | import matplotlib.pyplot as plt 12 | import os 13 | from data.plane_data2 import PlaneData, get_params 14 | 15 | import ipdb as pdb 16 | # np.random.seed(0) 17 | tf.set_random_seed(0) 18 | 19 | A = B = 40 20 | 21 | x_dim, u_dim, T = get_params() 22 | z_dim = 2 # latent space dimensionality 23 | eps = 1e-9 # numerical stability 24 | 25 | 26 | def orthogonal_initializer(scale=1.1): 27 | """ 28 | From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120 29 | """ 30 | 31 | def _initializer(shape, dtype=tf.float32): 32 | flat_shape = (shape[0], np.prod(shape[1:])) 33 | a = np.random.normal(0.0, 1.0, flat_shape) 34 | u, _, v = np.linalg.svd(a, full_matrices=False) 35 | # pick the one with the correct shape 36 | q = u if u.shape == flat_shape else v 37 | q = q.reshape(shape) 38 | print( 39 | 'Warning -- You have opted to use the orthogonal_initializer function' 40 | ) 41 | return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32) 42 | 43 | return _initializer 44 | 45 | 46 | class NormalDistribution(object): 47 | """ 48 | Represents a multivariate normal distribution parameterized by 49 | N(mu,Cov). If cov. matrix is diagonal, Cov=(sigma).^2. Otherwise, 50 | Cov=A*(sigma).^2*A', where A = (I+v*r^T). 51 | """ 52 | 53 | def __init__(self, mu, sigma, logsigma, v=None, r=None): 54 | self.mu = mu 55 | self.sigma = sigma # either stdev diagonal itself, or stdev diagonal from decomposition 56 | self.logsigma = logsigma 57 | dim = mu.get_shape() 58 | if v is None: 59 | v = tf.constant(0., shape=dim) 60 | if r is None: 61 | r = tf.constant(0., shape=dim) 62 | self.v = v 63 | self.r = r 64 | 65 | 66 | def linear(x, output_dim): 67 | w = tf.get_variable( 68 | "w", [x.get_shape()[1], output_dim], 69 | initializer=orthogonal_initializer(1.1)) 70 | b = tf.get_variable( 71 | "b", [output_dim], initializer=tf.constant_initializer(0.0)) 72 | return tf.matmul(x, w) + b 73 | 74 | 75 | def ReLU(x, output_dim, scope): 76 | # helper function for implementing stacked ReLU layers 77 | with tf.variable_scope(scope): 78 | return tf.nn.relu(linear(x, output_dim)) 79 | 80 | 81 | def encode(x, share=None): 82 | with tf.variable_scope("encoder", reuse=share): 83 | for l in range(3): 84 | x = ReLU(x, 150, "aggregate_loss" + str(l)) 85 | return linear(x, 2 * z_dim) 86 | 87 | 88 | def KLGaussian(Q, N): 89 | # Q, N are instances of NormalDistribution 90 | # implements KL Divergence term KL(N0,N1) derived in Appendix A.1 91 | # Q ~ Normal(mu,A*sigma*A^T), N ~ Normal(mu,sigma_1) 92 | # returns scalar divergence, measured in nats (information units under log rather than log2), shape= batch x 1 93 | sum = lambda x: tf.reduce_sum(x, 1) # convenience fn for summing over features (columns) 94 | k = float(Q.mu.get_shape()[1].value) # dimension of distribution 95 | mu0, v, r, mu1 = Q.mu, Q.v, Q.r, N.mu 96 | s02, s12 = tf.square(Q.sigma), tf.square(N.sigma) + eps 97 | #vr=sum(v*r) 98 | a = sum(s02 * (1. + 2. * v * r) / s12) + sum(tf.square(v) / s12) * sum( 99 | tf.square(r) * s02) # trace term 100 | b = sum(tf.square(mu1 - mu0) / s12) # difference-of-means term 101 | c = 2. * (sum(N.logsigma - Q.logsigma) - tf.log(1. + sum(v * r)) 102 | ) # ratio-of-determinants term. 103 | return 0.5 * (a + b - k + c) #, a, b, c 104 | 105 | 106 | def sampleNormal(mu, sigma): 107 | # diagonal stdev 108 | n01 = tf.random_normal(sigma.get_shape(), mean=0, stddev=1) 109 | return mu + sigma * n01 110 | 111 | 112 | def sampleQ_phi(h_enc, share=None): 113 | with tf.variable_scope("sampleQ_phi", reuse=share): 114 | mu, log_sigma = tf.split(1, 2, linear( 115 | h_enc, z_dim * 2)) # diagonal stdev values 116 | sigma = tf.exp(log_sigma) 117 | return sampleNormal(mu, sigma), NormalDistribution( 118 | mu, sigma, log_sigma) 119 | 120 | 121 | def transition(h): 122 | # compute A,B,o linearization matrices 123 | with tf.variable_scope("trans"): 124 | for l in range(2): 125 | h = ReLU(h, 100, "aggregate_loss" + str(l)) 126 | with tf.variable_scope("A"): 127 | v, r = tf.split(1, 2, linear(h, z_dim * 2)) 128 | v1 = tf.expand_dims(v, -1) # (batch, z_dim, 1) 129 | rT = tf.expand_dims(r, 1) # batch, 1, z_dim 130 | I = tf.diag([1.] * z_dim) 131 | A = ( 132 | I + tf.batch_matmul(v1, rT) 133 | ) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) 134 | with tf.variable_scope("B"): 135 | B = linear(h, z_dim * u_dim) 136 | B = tf.reshape(B, [-1, z_dim, u_dim]) 137 | with tf.variable_scope("o"): 138 | o = linear(h, z_dim) 139 | return A, B, o, v, r 140 | 141 | 142 | def sampleQ_psi(z, u, Q_phi): 143 | A, B, o, v, r = transition(z) 144 | with tf.variable_scope("sampleQ_psi"): 145 | mu_t = tf.expand_dims(Q_phi.mu, -1) # batch,z_dim,1 146 | Amu = tf.squeeze(tf.batch_matmul(A, mu_t), [-1]) 147 | u = tf.expand_dims(u, -1) # batch,u_dim,1 148 | Bu = tf.squeeze(tf.batch_matmul(B, u), [-1]) 149 | Q_psi = NormalDistribution(Amu + Bu + o, Q_phi.sigma, Q_phi.logsigma, 150 | v, r) 151 | # the actual z_next sample is generated by deterministically transforming z_t 152 | z = tf.expand_dims(z, -1) 153 | Az = tf.squeeze(tf.batch_matmul(A, z), [-1]) 154 | z_next = Az + Bu + o 155 | return z_next, Q_psi #,(A,B,o,v,r) # debugging 156 | 157 | 158 | def decode(z, share=None): 159 | with tf.variable_scope("decoder", reuse=share): 160 | for l in range(2): 161 | z = ReLU(z, 200, "aggregate_loss" + str(l)) 162 | return linear(z, x_dim) 163 | 164 | 165 | def binary_crossentropy(t, o): 166 | return t * tf.log(o + eps) + (1.0 - t) * tf.log(1.0 - o + eps) 167 | 168 | 169 | def recons_loss(x, x_recons): 170 | with tf.variable_scope("Lx"): 171 | ll = tf.reduce_sum(binary_crossentropy(x, x_recons), 172 | 1) # sum across features 173 | return -ll # negative log-likelihood 174 | 175 | 176 | def latent_loss(Q): 177 | with tf.variable_scope("Lz"): 178 | mu2 = tf.square(Q.mu) 179 | sigma2 = tf.square(Q.sigma) 180 | # negative of the upper bound of posterior 181 | return -0.5 * tf.reduce_sum(1 + 2 * Q.logsigma - mu2 - sigma2, 1) 182 | 183 | 184 | def sampleP_theta(h_dec, share=None): 185 | # sample x from bernoulli distribution with means p=W(h_dec) 186 | with tf.variable_scope("P_theta", reuse=share): 187 | p = linear(h_dec, x_dim) 188 | return tf.sigmoid(p) # mean of bernoulli distribution 189 | 190 | 191 | # BUILD NETWORK 192 | batch_size = 128 193 | 194 | x = tf.placeholder(tf.float32, [batch_size, x_dim]) 195 | u = tf.placeholder(tf.float32, [batch_size, u_dim]) # control at time t 196 | x_next = tf.placeholder(tf.float32, [batch_size, 197 | x_dim]) # observation at time t+1 198 | 199 | # encode x_t 200 | h_enc = encode(x) 201 | z, Q_phi = sampleQ_phi(h_enc) 202 | # reconstitute x_t 203 | h_dec = decode(z) 204 | x_recons = sampleP_theta(h_dec) 205 | # compute linearized dynamics, predict new latent state 206 | z_predict, Q_psi = sampleQ_psi(z, u, Q_phi) 207 | # decode prediction 208 | h_dec_predict = decode(z_predict, share=True) 209 | x_predict = sampleP_theta(h_dec_predict, share=True) 210 | # encode next 211 | h_enc_next = encode(x_next, share=True) 212 | z_next, Q_phi_next = sampleQ_phi(h_enc_next, share=True) 213 | 214 | with tf.variable_scope("Loss"): 215 | L_x = recons_loss(x, x_recons) 216 | L_x_next = recons_loss(x_next, x_predict) 217 | L_z = latent_loss(Q_phi) 218 | L_bound = L_x + L_x_next + L_z 219 | KL = KLGaussian(Q_psi, Q_phi_next) 220 | lambd = 0.25 221 | loss = tf.reduce_mean( 222 | L_bound + lambd * KL) # average loss over minibatch to single scalar 223 | 224 | for v in tf.all_variables(): 225 | print("%s : %s" % (v.name, v.get_shape())) 226 | 227 | pdb.set_trace() 228 | 229 | with tf.variable_scope("Optimizer"): 230 | learning_rate = 1e-4 231 | optimizer = tf.train.AdamOptimizer( 232 | learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1 233 | train_op = optimizer.minimize(loss) 234 | 235 | saver = tf.train.Saver(max_to_keep=200) # keep all checkpoint files 236 | 237 | ckpt_file = "/ltmp/e2c-plane" 238 | 239 | # summaries 240 | tf.scalar_summary("loss", loss) 241 | tf.scalar_summary("L_x", tf.reduce_mean(L_x)) 242 | tf.scalar_summary("L_x_next", tf.reduce_mean(L_x_next)) 243 | tf.scalar_summary("L_z", tf.reduce_mean(L_z)) 244 | tf.scalar_summary("KL", tf.reduce_mean(KL)) 245 | all_summaries = tf.merge_all_summaries() 246 | 247 | # TRAIN 248 | if __name__ == "__main__": 249 | init = tf.initialize_all_variables() 250 | sess = tf.InteractiveSession() 251 | sess.run(init) 252 | # WRITER 253 | writer = tf.train.SummaryWriter("/ltmp/e2c", sess.graph_def) 254 | 255 | dataset = PlaneData("data/plane1.npz", "data/env1.png") 256 | dataset.initialize() 257 | 258 | # tmp 259 | # (x_val,u_val,x_next_val)=dataset.sample(batch_size, replace=False) 260 | # feed_dict={ 261 | # x:x_val, 262 | # u:u_val, 263 | # x_next:x_next_val 264 | # } 265 | # results=sess.run([L_x,L_x_next,L_z,L_bound,KL],feed_dict) 266 | # pdb.set_trace() 267 | # resume training 268 | #saver.restore(sess, "/ltmp/e2c-plane-83000.ckpt") 269 | train_iters = 2e5 # 5K iters 270 | for i in range(int(train_iters)): 271 | (x_val, u_val, x_next_val) = dataset.sample(batch_size, replace=False) 272 | feed_dict = {x: x_val, u: u_val, x_next: x_next_val} 273 | plt.hist(x_val[0, :]) 274 | plt.show() 275 | results = sess.run([loss, all_summaries, train_op], feed_dict) 276 | if i % 1000 == 0: 277 | print("iter=%d : Loss: %f" % (i, results[0])) 278 | if i > 2000: 279 | writer.add_summary(results[1], i) 280 | if (i % 100 == 0 and i < 1000) or (i % 1000 == 0): 281 | saver.save(sess, ckpt_file + "-%05d" % (i) + ".ckpt") 282 | 283 | sess.close() 284 | -------------------------------------------------------------------------------- /e2c/tf_e2c/e2c_seq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | multi-step prediction using E2C 5 | results are kind of poor... 6 | """ 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | import matplotlib.pyplot as plt 11 | import os 12 | from data.plane_data2 import PlaneData, get_params 13 | 14 | A=B=40 15 | 16 | x_dim,u_dim,T=get_params() 17 | z_dim=2 # latent space dimensionality 18 | eps=1e-9 # numerical stability 19 | 20 | def orthogonal_initializer(scale = 1.1): 21 | """ 22 | From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120 23 | """ 24 | def _initializer(shape, dtype=tf.float32): 25 | flat_shape = (shape[0], np.prod(shape[1:])) 26 | a = np.random.normal(0.0, 1.0, flat_shape) 27 | u, _, v = np.linalg.svd(a, full_matrices=False) 28 | # pick the one with the correct shape 29 | q = u if u.shape == flat_shape else v 30 | q = q.reshape(shape) 31 | print('Warning -- You have opted to use the orthogonal_initializer function') 32 | return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32) 33 | return _initializer 34 | 35 | class NormalDistribution(object): 36 | """ 37 | Represents a multivariate normal distribution parameterized by 38 | N(mu,Cov). If cov. matrix is diagonal, Cov=(sigma).^2. Otherwise, 39 | Cov=A*(sigma).^2*A', where A = (I+v*r^T). 40 | """ 41 | def __init__(self,mu,sigma,logsigma,v=None,r=None): 42 | self.mu=mu 43 | self.sigma=sigma # either stdev diagonal itself, or stdev diagonal from decomposition 44 | self.logsigma=logsigma 45 | dim=mu.get_shape() 46 | if v is None: 47 | v=tf.constant(0.,shape=dim) 48 | if r is None: 49 | r=tf.constant(0.,shape=dim) 50 | self.v=v 51 | self.r=r 52 | 53 | def linear(x,output_dim): 54 | w=tf.get_variable("w", [x.get_shape()[1], output_dim], initializer=orthogonal_initializer(1.1)) 55 | b=tf.get_variable("b", [output_dim], initializer=tf.constant_initializer(0.0)) 56 | return tf.matmul(x,w)+b 57 | 58 | def ReLU(x,output_dim, scope): 59 | # helper function for implementing stacked ReLU layers 60 | with tf.variable_scope(scope): 61 | return tf.nn.relu(linear(x,output_dim)) 62 | 63 | def encode(x,share=None): 64 | with tf.variable_scope("encoder",reuse=share): 65 | for l in range(3): 66 | x=ReLU(x,150,"aggregate_loss"+str(l)) 67 | return linear(x,2*z_dim) 68 | 69 | def KLGaussian(Q,N): 70 | # Q, N are instances of NormalDistribution 71 | # implements KL Divergence term KL(N0,N1) derived in Appendix A.1 72 | # Q ~ Normal(mu,A*sigma*A^T), N ~ Normal(mu,sigma_1) 73 | # returns scalar divergence, measured in nats (information units under log rather than log2), shape= batch x 1 74 | sum=lambda x: tf.reduce_sum(x,1) # convenience fn for summing over features (columns) 75 | k=float(Q.mu.get_shape()[1].value) # dimension of distribution 76 | mu0,v,r,mu1=Q.mu,Q.v,Q.r,N.mu 77 | s02,s12=tf.square(Q.sigma),tf.square(N.sigma)+eps 78 | #vr=sum(v*r) 79 | a=sum(s02*(1.+2.*v*r)/s12) + sum(tf.square(v)/s12)*sum(tf.square(r)*s02) # trace term 80 | b=sum(tf.square(mu1-mu0)/s12) # difference-of-means term 81 | c=2.*(sum(N.logsigma-Q.logsigma) - tf.log(1.+sum(v*r))) # ratio-of-determinants term. 82 | return 0.5*(a+b-k+c)#, a, b, c 83 | 84 | def sampleNormal(mu,sigma): 85 | # diagonal stdev 86 | n01=tf.random_normal(sigma.get_shape(), mean=0, stddev=1) 87 | return mu+sigma*n01 88 | 89 | def sampleQ_phi(h_enc,share=None): 90 | with tf.variable_scope("sampleQ_phi",reuse=share): 91 | mu,log_sigma=tf.split(1,2,linear(h_enc,z_dim*2)) # diagonal stdev values 92 | sigma=tf.exp(log_sigma) 93 | return sampleNormal(mu,sigma), NormalDistribution(mu, sigma, log_sigma) 94 | 95 | def transition(h,share=None): 96 | # compute A,B,o linearization matrices 97 | with tf.variable_scope("trans",reuse=share): 98 | for l in range(2): 99 | h=ReLU(h,100,"aggregate_loss"+str(l)) 100 | with tf.variable_scope("A"): 101 | v,r=tf.split(1,2,linear(h,z_dim*2)) 102 | v1=tf.expand_dims(v,-1) # (batch, z_dim, 1) 103 | rT=tf.expand_dims(r,1) # batch, 1, z_dim 104 | I=tf.diag([1.]*z_dim) 105 | A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) 106 | with tf.variable_scope("B"): 107 | B=linear(h,z_dim*u_dim) 108 | B=tf.reshape(B,[-1,z_dim,u_dim]) 109 | with tf.variable_scope("o"): 110 | o=linear(h,z_dim) 111 | return A,B,o,v,r 112 | 113 | def sampleQ_psi(z,u,Q_phi,share=None): 114 | A,B,o,v,r=transition(z,share) 115 | with tf.variable_scope("sampleQ_psi"): 116 | mu_t=tf.expand_dims(Q_phi.mu,-1) # batch,z_dim,1 117 | Amu=tf.squeeze(tf.batch_matmul(A,mu_t), [-1]) 118 | u=tf.expand_dims(u,-1) # batch,u_dim,1 119 | Bu=tf.squeeze(tf.batch_matmul(B,u),[-1]) 120 | Q_psi=NormalDistribution(Amu+Bu+o,Q_phi.sigma,Q_phi.logsigma, v, r) 121 | # the actual z_next sample is generated by deterministically transforming z_t 122 | z=tf.expand_dims(z,-1) 123 | Az=tf.squeeze(tf.batch_matmul(A,z),[-1]) 124 | z_next=Az+Bu+o 125 | return z_next,Q_psi#,(A,B,o,v,r) # debugging 126 | 127 | def decode(z,share=None): 128 | with tf.variable_scope("decoder",reuse=share): 129 | for l in range(2): 130 | z=ReLU(z,200,"aggregate_loss"+str(l)) 131 | return linear(z,x_dim) 132 | 133 | def binary_crossentropy(t,o): 134 | return t*tf.log(o+eps) + (1.0-t)*tf.log(1.0-o+eps) 135 | 136 | def recons_loss(x,x_recons): 137 | with tf.variable_scope("Lx"): 138 | ll=tf.reduce_sum(binary_crossentropy(x,x_recons),1) # sum across features 139 | return -ll # negative log-likelihood 140 | 141 | def latent_loss(Q): 142 | with tf.variable_scope("Lz"): 143 | mu2=tf.square(Q.mu) 144 | sigma2=tf.square(Q.sigma) 145 | # negative of the upper bound of posterior 146 | return -0.5*tf.reduce_sum(1+2*Q.logsigma-mu2-sigma2,1) 147 | 148 | def sampleP_theta(h_dec,share=None): 149 | # sample x from bernoulli distribution with means p=W(h_dec) 150 | with tf.variable_scope("P_theta",reuse=share): 151 | p=linear(h_dec,x_dim) 152 | return tf.sigmoid(p) # mean of bernoulli distribution 153 | 154 | # BUILD NETWORK 155 | batch_size=64 156 | 157 | 158 | # modified 159 | T=10 # sequence len 160 | 161 | # inputs 162 | xs=[tf.placeholder(tf.float32,[batch_size, x_dim])]*T 163 | us=[tf.placeholder(tf.float32, [batch_size, u_dim])]*(T-1) 164 | 165 | x_recons=[0]*(T-1) # we'latent_loss apply VAE recons to the last one in the sequence as well 166 | x_predicts=[0]*(T-1) 167 | 168 | # T-1 recons and prediction losses 169 | losses=[0]*(T-1) 170 | kl_losses=[0]*(T-1) 171 | lbound_losses=[0]*(T-1) 172 | print(T) 173 | # arrays for storing losses 174 | DO_SHARE=None 175 | for t in range(T-1): 176 | if t==0: 177 | x=xs[0] # we are given the first one 178 | else: 179 | x=x_predicts[t-1] # re-use our hallucinations to predict 180 | #x=xs[t] 181 | h_enc=encode(x,share=DO_SHARE) 182 | z,Q_phi=sampleQ_phi(h_enc,share=DO_SHARE) 183 | # reconstitute x_t 184 | h_dec=decode(z,share=DO_SHARE) 185 | x_recons[t]=sampleP_theta(h_dec,share=DO_SHARE) 186 | # linearized dynamics, predict 187 | # compute linearized dynamics, predict new latent state 188 | z_predict,Q_psi=sampleQ_psi(z,us[t],Q_phi,share=DO_SHARE) 189 | DO_SHARE=True # share from now on 190 | h_dec_predict=decode(z_predict,share=DO_SHARE) 191 | x_predicts[t]=sampleP_theta(h_dec_predict,share=DO_SHARE) 192 | # encode next true data 193 | h_enc_next=encode(xs[t+1],share=DO_SHARE) 194 | z_next,Q_phi_next=sampleQ_phi(h_enc_next,share=DO_SHARE) 195 | 196 | # add loss terms 197 | L_x=recons_loss(xs[t],x_recons[t]) 198 | L_x_next=recons_loss(xs[t+1],x_predicts[t]) 199 | L_z=latent_loss(Q_phi) 200 | L_bound=L_x+L_x_next+L_z 201 | KL=KLGaussian(Q_psi,Q_phi_next) 202 | lambd=1.0 203 | 204 | lbound_losses[t]=tf.reduce_mean(L_bound) 205 | kl_losses[t]=tf.reduce_mean(lambd*KL) 206 | losses[t]=tf.reduce_mean(L_bound+lambd*KL) # average loss over minibatch to single scalar 207 | 208 | 209 | loss=tf.add_n(losses) # sum all the losses to compute the sequence loss 210 | 211 | for v in tf.all_variables(): 212 | print("%s : %s" % (v.name, v.get_shape())) 213 | 214 | with tf.variable_scope("Optimizer"): 215 | learning_rate=1e-4 216 | optimizer=tf.train.AdamOptimizer(learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1 217 | train_op=optimizer.minimize(loss) 218 | 219 | saver = tf.train.Saver(max_to_keep=100) # keep all checkpoint files 220 | 221 | ckpt_file="/ltmp/e2c-plane" 222 | 223 | # summaries 224 | tf.scalar_summary("loss", loss) 225 | 226 | # monitor losses in case behavior of Lz_8 differs drastically from Lz_0 prediction 227 | #for t in range(T-1): 228 | # tf.scalar_summary("Lb_%d" % (t), lbound_losses[t]) 229 | # tf.scalar_summary("Lz_%d" % (t), kl_losses[t]) 230 | # tf.scalar_summary("loss_%d" % (t), losses[t]) 231 | 232 | # tf.scalar_summary("L_x", tf.reduce_mean(L_x)) 233 | # tf.scalar_summary("L_x_next", tf.reduce_mean(L_x_next)) 234 | # tf.scalar_summary("L_z", tf.reduce_mean(L_z)) 235 | # tf.scalar_summary("KL",tf.reduce_mean(KL)) 236 | all_summaries = tf.merge_all_summaries() 237 | 238 | # TRAIN 239 | if __name__=="__main__": 240 | init=tf.initialize_all_variables() 241 | sess=tf.InteractiveSession() 242 | sess.run(init) 243 | # WRITER 244 | writer = tf.train.SummaryWriter("/ltmp/e2c", sess.graph_def) 245 | 246 | dataset=PlaneData("data/plane1.npz","data/env1.png") 247 | dataset.initialize() 248 | 249 | # resume training 250 | saver.restore(sess, "/ltmp/e2c-plane-single1.ckpt") 251 | 252 | # test to make sure samples are actually trajectories 253 | 254 | def getimgs(x): 255 | padsize=1 256 | padval=.5 257 | ph=B+2*padsize 258 | pw=A+2*padsize 259 | img=np.ones((ph,len(x)*pw))*padval 260 | for t in range(len(x)): 261 | startc=t*pw+padsize 262 | img[padsize:padsize+B, startc:startc+A]=x[t][20,:].reshape((A,B)) 263 | return img 264 | (x_vals,u_vals)=dataset.sample_seq(batch_size,T) 265 | plt.matshow(getimgs(x_vals),cmap=plt.cm.gray,vmin=0,vmax=1) 266 | plt.show() 267 | 268 | train_iters=2e5 # 5K iters 269 | for i in range(int(train_iters)): 270 | (x_vals,u_vals)=dataset.sample_seq(batch_size,T,replace=False) 271 | feed_dict={} 272 | for t in range(T): 273 | feed_dict[xs[t]] = x_vals[t] 274 | for t in range(T-1): 275 | feed_dict[us[t]] = u_vals[t] 276 | 277 | results=sess.run([loss,all_summaries,train_op],feed_dict) 278 | if i%1000==0: 279 | print("iter=%d : Loss: %f" % (i,results[0])) 280 | if i>2000: 281 | writer.add_summary(results[1], i) 282 | if (i%100==0 and i < 1000) or (i % 1000 == 0): 283 | saver.save(sess,ckpt_file+"-%05d"%(i)+".ckpt") 284 | 285 | # save variables 286 | #print("Model saved in file: %s" % saver.save(sess,ckpt_file+".ckpt")) 287 | 288 | sess.close() 289 | -------------------------------------------------------------------------------- /e2c/tf_e2c/env0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/env0.png -------------------------------------------------------------------------------- /e2c/tf_e2c/env1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/env1.png -------------------------------------------------------------------------------- /e2c/tf_e2c/env_blank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethanluoyc/e2c-pytorch/a1c9be82a071f458d2a9bbc4395eead4941efbc6/e2c/tf_e2c/env_blank.png -------------------------------------------------------------------------------- /e2c/tf_e2c/plane_data2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | 5 | import numpy as np 6 | from numpy.random import randint 7 | import os 8 | from .dataset import DataSet 9 | 10 | num_t = 80 # number of trajectories (i.e. number of initial states) 11 | T = 1000 # length of each trajectory sequence 12 | u_dim = 2 # control (action) dimension 13 | w, h = 40, 40 14 | x_dim = w * h 15 | rw = 1 # robot half-width 16 | 17 | 18 | def get_params(): 19 | return x_dim, u_dim, T 20 | 21 | 22 | class PlaneData(DataSet): 23 | def __init__(self, fname, env_file): 24 | super(PlaneData, self).__init__() 25 | self.cache = fname 26 | self.initialized = False 27 | self.im = plt.imread(os.path.join(os.path.dirname(__file__), env_file)) # grayscale 28 | self.params = (x_dim, u_dim, T) 29 | 30 | def is_colliding(self, p): 31 | if np.any([p - rw < 0, p + rw >= w]): 32 | return True 33 | # check robot body overlap with obstacle field 34 | return np.mean( 35 | self.im[p[0] - rw:p[0] + rw + 1, p[1] - rw:p[1] + rw + 1]) > 0.05 36 | 37 | def compute_traj(self, max_dist=1): 38 | # computes P,U data for single trajectory 39 | # all P,U share the same environment obstacles.png 40 | P = np.zeros((T, 2), dtype=np.int) # r,c position 41 | U = np.zeros((T, u_dim), dtype=np.int) 42 | P[0, :] = [rw, randint(rw, w - rw)] # initial location 43 | for t in range(1, T): 44 | p = np.copy(P[t - 1, :]) 45 | # dr direction 46 | d = randint(-1, 2) # direction 47 | nsteps = randint(max_dist + 1) 48 | dr = d * nsteps # applied control 49 | for i in range(nsteps): 50 | p[0] += d 51 | if self.is_colliding(p): 52 | p[0] -= d 53 | break 54 | # dc direction 55 | d = randint(-1, 2) # direction 56 | nsteps = randint(max_dist + 1) 57 | dc = d * nsteps # applied control 58 | for i in range(nsteps): 59 | p[1] += d 60 | if self.is_colliding(p): 61 | p[1] -= d # step back 62 | break 63 | P[t, :] = p 64 | U[t, :] = [dr, dc] 65 | return P, U 66 | 67 | def initialize(self): 68 | if os.path.exists(self.cache): 69 | self.load() 70 | else: 71 | self.precompute() 72 | self.initialized = True 73 | 74 | def compute_data(self): 75 | # compute multiple trajectories 76 | P = np.zeros((num_t, T, 2), dtype=np.int) 77 | U = np.zeros((num_t, T, u_dim), dtype=np.int) 78 | for i in range(num_t): 79 | P[i, :, :], U[i, :, :] = self.compute_traj(max_dist=2) 80 | return P, U 81 | 82 | def precompute(self): 83 | print("Precomputing P,U...") 84 | self.P, self.U = self.compute_data() 85 | 86 | def save(self): 87 | print("Saving P,U...") 88 | np.savez(self.cache, P=self.P, U=self.U) 89 | 90 | def load(self): 91 | print("Loading P,U from %s..." % (self.cache)) 92 | D = np.load(self.cache) 93 | self.P, self.U = D['P'], D['U'] 94 | 95 | def getXp(self, p): 96 | # return image X given true state p (position) of robot 97 | x = np.copy(self.im) 98 | x[p[0] - rw:p[0] + rw + 1, p[1] - rw: 99 | p[1] + rw + 1] = 1. # robot is white on black background 100 | return x.flat 101 | 102 | def getX(self, i, t): 103 | # i=trajectory index, t=time step 104 | return self.getXp(self.P[i, t, :]) 105 | 106 | def getXTraj(self, i): 107 | # i=traj index 108 | X = np.zeros((T, x_dim), dtype=np.float) 109 | for t in range(T): 110 | X[t, :] = self.getX(i, t) 111 | return X 112 | 113 | def sample(self, batch_size): 114 | """ 115 | computes (x_t,u_t,x_{t+1}) pair 116 | returns tuple of 3 ndarrays with shape 117 | (batch,x_dim), (batch, u_dim), (batch, x_dim) 118 | """ 119 | if not self.initialized: 120 | raise ValueError( 121 | "Dataset not loaded - call PlaneData.initialize() first.") 122 | traj = randint(0, num_t, size=batch_size) # which trajectory 123 | tt = randint(0, T - 1, size=batch_size) # time step t for each batch 124 | X0 = np.zeros((batch_size, x_dim)) 125 | U0 = np.zeros((batch_size, u_dim), dtype=np.int) 126 | X1 = np.zeros((batch_size, x_dim)) 127 | for i in range(batch_size): 128 | t = tt[i] 129 | p = self.P[traj[i], t, :] 130 | X0[i, :] = self.getX(traj[i], t) 131 | X1[i, :] = self.getX(traj[i], t + 1) 132 | U0[i, :] = self.U[traj[i], t, :] 133 | return (X0, U0, X1) 134 | 135 | def getPSpace(self): 136 | """ 137 | Returns all possible positions of agent 138 | """ 139 | ww = h - 2 * rw 140 | P = np.zeros((ww * ww, 2)) # max possible positions 141 | i = 0 142 | p = np.array([rw, rw]) # initial location 143 | for dr in range(ww): 144 | for dc in range(ww): 145 | if not self.is_colliding(p + np.array([dr, dc])): 146 | P[i, :] = p + np.array([dr, dc]) 147 | i += 1 148 | return P[:i, :] 149 | 150 | def getXPs(self, Ps): 151 | X = np.zeros((Ps.shape[0], x_dim)) 152 | for i in range(Ps.shape[0]): 153 | X[i, :] = self.getXp(Ps[i, :]) 154 | return X 155 | 156 | 157 | if __name__ == "__main__": 158 | import matplotlib.animation as animation 159 | p = PlaneData("plane2.npz", "env1.png") 160 | p.initialize() 161 | p.save() 162 | im = p.im 163 | A, B = im.shape 164 | 165 | # show sample tuples 166 | if True: 167 | fig, aa = plt.subplots(1, 2) 168 | x0, u0, x1 = p.sample(2) 169 | m1 = aa[0].matshow( 170 | x0[0, :].reshape(w, w), cmap=plt.cm.gray, vmin=0., vmax=1.) 171 | aa[0].set_title('x(t)') 172 | m2 = aa[1].matshow( 173 | x1[0, :].reshape(w, w), cmap=plt.cm.gray, vmin=0., vmax=1.) 174 | aa[1].set_title('x(t+1), u=(%d,%d)' % (u0[0, 0], u0[0, 1])) 175 | fig.tight_layout() 176 | 177 | def updatemat2(t): 178 | x0, u0, x1 = p.sample(2) 179 | m1.set_data(x0[0, :].reshape(w, w)) 180 | m2.set_data(x1[0, :].reshape(w, w)) 181 | return m1, m2 182 | 183 | anim = animation.FuncAnimation( 184 | fig, updatemat2, frames=100, interval=1000, blit=True, repeat=True) 185 | 186 | Writer = animation.writers['imagemagick'] # animation.writers.avail 187 | writer = Writer(fps=1, metadata=dict(artist='Me'), bitrate=1800) 188 | anim.save('sample_obs.gif', writer=writer) 189 | 190 | #show trajectory 191 | if True: 192 | fig, ax = plt.subplots() 193 | X = p.getXTraj(0) 194 | mat = ax.matshow( 195 | X[0, :].reshape((A, B)), cmap=plt.cm.gray, vmin=0., vmax=1.) 196 | 197 | def updatemat(t): 198 | mat.set_data(X[t, :].reshape((A, B))) 199 | return mat, 200 | 201 | anim = animation.FuncAnimation( 202 | fig, updatemat, frames=T - 1, interval=30, blit=True, repeat=True) 203 | plt.show() 204 | -------------------------------------------------------------------------------- /e2c/tf_e2c/tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | test functions like KLGaussian to make sure you implemented correctly 3 | """ 4 | 5 | import numpy as np 6 | 7 | # ground truth implementation 8 | from divergence import gau_kl 9 | 10 | pm = np.array([1., 1., 1.], dtype=np.float32) # true 11 | pv = np.array([0.1, 0.3, 0.5], dtype=np.float32) # diagonal covariance 12 | qm = np.array([0., 0., 0.], dtype=np.float32) 13 | qv = np.array([1., 1., 1.], dtype=np.float32) 14 | 15 | KL, a, b, c = gau_kl(pm, pv, qm, qv) # assumes diagonal covariances... 16 | print('KL : %f' % (KL)) 17 | print('trace term : %f' % (a)) 18 | print('difference of means : %f' % (b)) 19 | print('ratio of determinants : %f' % (c)) 20 | 21 | # my implementation 22 | 23 | import tensorflow as tf 24 | from e2c import NormalDistribution, KLGaussian 25 | batch_size = 1 26 | z_dim = 3 27 | 28 | I = tf.identity(np.tile(np.eye(z_dim, dtype=np.float32), 29 | [batch_size, 1, 30 | 1])) # identity matrix (batch_size, z_dim, z_dim) 31 | zero_z = tf.constant(0., shape=[batch_size, z_dim]) 32 | 33 | pm = pm.reshape((batch_size, z_dim)) 34 | pv = pv.reshape((batch_size, z_dim)) 35 | qm = qm.reshape((batch_size, z_dim)) 36 | qv = qv.reshape((batch_size, z_dim)) 37 | 38 | pmu = tf.constant(pm, shape=[batch_size, z_dim]) 39 | psigma = tf.constant(np.sqrt(pv)) 40 | P = NormalDistribution(pmu, psigma, tf.log(psigma), zero_z, zero_z) 41 | 42 | qmu = tf.constant(qm, shape=[batch_size, z_dim]) 43 | qsigma = tf.constant(np.sqrt(qv)) 44 | Q = NormalDistribution(qmu, qsigma, tf.log(qsigma), zero_z, zero_z) 45 | 46 | # sigma0=tf.constant([0.1,0.3,0.5]) 47 | # P=NormalDistribution(one_z, sigma0, tf.log(sigma0), zero_z, zero_z) 48 | # Pz=NormalDistribution(zero_z, one_z, zero_z, zero_z, zero_z)# prior on Q_phi = mean 0, unit variance => logsigma=0 49 | 50 | KL, a, b, c = KLGaussian(P, Q, "tmp") 51 | 52 | sess = tf.InteractiveSession() 53 | 54 | results = sess.run([KL, a, b, c]) 55 | print('KL : %f' % (results[0])) 56 | print('trace term : %f' % (results[1])) 57 | print('difference of means : %f' % (results[2])) 58 | print('ratio of determinants : %f' % (results[3])) 59 | 60 | sess.close() 61 | -------------------------------------------------------------------------------- /e2c/tf_e2c/vae.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | import matplotlib.pyplot as plt 8 | import os 9 | from tensorflow.examples.tutorials import mnist 10 | 11 | A = B = 40 12 | x_dim = A * B 13 | z_dim = 2 14 | 15 | eps = 1e-9 # numerical stability 16 | 17 | 18 | def orthogonal_initializer(scale=1.1): 19 | ''' From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120 20 | ''' 21 | print( 22 | 'Warning -- You have opted to use the orthogonal_initializer function') 23 | 24 | def _initializer(shape, dtype=tf.float32): 25 | flat_shape = (shape[0], np.prod(shape[1:])) 26 | a = np.random.normal(0.0, 1.0, flat_shape) 27 | u, _, v = np.linalg.svd(a, full_matrices=False) 28 | # pick the one with the correct shape 29 | q = u if u.shape == flat_shape else v 30 | q = q.reshape(shape) #this needs to be corrected to float32 31 | print('you have initialized one orthogonal matrix.') 32 | return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32) 33 | 34 | return _initializer 35 | 36 | 37 | class NormalDistribution(object): 38 | """docstring for NormalDistribution""" 39 | 40 | def __init__(self, mu, sigma, logsigma): 41 | super(NormalDistribution, self).__init__() 42 | self.mu = mu 43 | self.sigma = sigma 44 | self.logsigma = logsigma 45 | 46 | 47 | def linear(x, output_dim): 48 | #w=tf.get_variable("w", [x.get_shape()[1], output_dim], initializer=tf.random_normal_initializer(mean=0.0, stddev=.01)) 49 | w = tf.get_variable( 50 | "w", [x.get_shape()[1], output_dim], 51 | initializer=orthogonal_initializer(1.1)) 52 | b = tf.get_variable( 53 | "b", [output_dim], initializer=tf.constant_initializer(0.0)) 54 | return tf.matmul(x, w) + b 55 | 56 | 57 | def ReLU(x, output_dim, scope): 58 | with tf.variable_scope(scope): 59 | return tf.nn.relu(linear(x, output_dim)) 60 | 61 | 62 | def encode(x): 63 | with tf.variable_scope("encoder"): 64 | for l in range(3): 65 | x = ReLU(x, 150, "aggregate_loss" + str(l)) 66 | return linear(x, 4) 67 | #return tf.nn.relu(linear(x,z_dim)) 68 | 69 | 70 | def sampleNormal(mu, sigma): 71 | # note: sigma is diagonal standard deviation, not variance 72 | n01 = tf.random_normal(mu.get_shape(), mean=0, stddev=1) 73 | return mu + sigma * n01 74 | 75 | 76 | def sampleQ(h_enc): 77 | """ 78 | Samples Zt ~ normrnd(mu,sigma) via reparameterization trick for normal dist 79 | mu is (batch,z_size) 80 | 81 | """ 82 | with tf.variable_scope("sampleQ"): 83 | with tf.variable_scope("Q"): 84 | mu, log_sigma = tf.split(1, 2, linear(h_enc, z_dim * 2)) 85 | sigma = tf.exp(log_sigma) # sigma_t, covariance of Q_phi 86 | return sampleNormal(mu, sigma), NormalDistribution( 87 | mu, log_sigma, sigma) 88 | 89 | 90 | def decode(z): 91 | # with tf.variable_scope("decoder"): 92 | # return tf.nn.relu(linear(z,x_dim)) 93 | with tf.variable_scope("decoder"): 94 | for l in range(2): 95 | z = ReLU(z, 200, "aggregate_loss" + str(l)) 96 | return linear(z, x_dim) 97 | 98 | 99 | def binary_crossentropy(t, o): 100 | return -(t * tf.log(o + eps) + (1.0 - t) * tf.log(1.0 - o + eps)) 101 | 102 | 103 | def recons_loss(x, x_recons): 104 | with tf.variable_scope("Lx"): 105 | return tf.reduce_sum(binary_crossentropy(x, x_recons), 106 | 1) # sum across features 107 | 108 | 109 | def latent_loss(Q): 110 | # KL distribution between distribution in latent space and some prior 111 | # (regularizer) 112 | with tf.variable_scope("Lz"): 113 | mu2 = tf.square(Q.mu) 114 | sigma2 = tf.square(Q.sigma) 115 | #return 0.5*tf.reduce_sum(1.+mu2+sigma2-2.*Q.logsigma,1) # sum across features 116 | # negative of the upper bound of posterior 117 | return -0.5 * tf.reduce_sum(1 + 2 * Q.logsigma - mu2 - sigma2, 1) 118 | 119 | 120 | def sampleP_theta(h_dec): 121 | # sample x from bernoulli distribution with means p=W(h_dec) 122 | with tf.variable_scope("P_theta"): 123 | p = linear(h_dec, x_dim) 124 | return tf.sigmoid(p) # mean of bernoulli distribution 125 | 126 | 127 | # BUILD NETWORK 128 | batch_size = 64 129 | x = tf.placeholder(tf.float32, [batch_size, x_dim]) 130 | h_enc = encode(x) # encoded space 131 | z, Q = sampleQ(h_enc) # z - latent space 132 | #h_dec=decode(h_enc) # regular autoencoder 133 | h_dec = decode(z) # decoded space 134 | x_recons = sampleP_theta(h_dec) # original space 135 | 136 | with tf.variable_scope("Loss"): 137 | L_x = recons_loss(x, x_recons) 138 | L_z = latent_loss(Q) 139 | loss = tf.reduce_mean(L_x) 140 | #loss=tf.reduce_mean(L_x+L_z) # average over minibatch -> single scalar 141 | 142 | with tf.variable_scope("Optimizer"): 143 | learning_rate = 1e-4 144 | optimizer = tf.train.AdamOptimizer( 145 | learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1 146 | train_op = optimizer.minimize(loss) 147 | 148 | saver = tf.train.Saver() # saves variables learned during training 149 | 150 | # summaries 151 | tf.scalar_summary("loss", loss) 152 | tf.scalar_summary("L_x", tf.reduce_mean(L_x)) 153 | tf.scalar_summary("L_z", tf.reduce_mean(L_z)) 154 | all_summaries = tf.merge_all_summaries() 155 | 156 | # TRAIN 157 | init = tf.initialize_all_variables() 158 | sess = tf.InteractiveSession() 159 | sess.run(init) 160 | # WRITER 161 | writer = tf.train.SummaryWriter("/ltmp/vae", sess.graph_def) 162 | 163 | # PLANE TASK 164 | ckpt_file = "vaemodel_plane.ckpt" 165 | from plane_data2 import PlaneData 166 | dataset = PlaneData("plane.npz", "env0.png") 167 | dataset.initialize() 168 | 169 | # resume training 170 | #saver.restore(sess, ckpt_file) 171 | 172 | # # TRAIN 173 | if True: 174 | train_iters = 50000 175 | for i in range(int(train_iters)): 176 | (x_val, u_val, x_next_val) = dataset.sample(batch_size) 177 | #x_val=dataset.sample(batch_size) 178 | feed_dict = {x: x_val} 179 | results = sess.run([loss, all_summaries, train_op], feed_dict) 180 | writer.add_summary(results[1], i) # write summary data to disk 181 | if i % 1000 == 0: 182 | print("iter=%d : Loss: %f" % (i, results[0])) 183 | # save variables 184 | print("Model saved in file: %s" % saver.save(sess, ckpt_file)) 185 | 186 | if True: 187 | saver.restore(sess, ckpt_file) 188 | (x_val, u_val, x_next_val) = dataset.sample(batch_size) 189 | #x_val=dataset.sample(batch_size) 190 | xr = sess.run(x_recons, {x: x_val}) 191 | fig, arr = plt.subplots(10, 2) 192 | for i in range(10): 193 | arr[i, 0].matshow( 194 | x_val[i, :].reshape((A, B)), cmap=plt.cm.gray, vmin=0, vmax=1) 195 | arr[i, 1].matshow( 196 | xr[i, :].reshape((A, B)), cmap=plt.cm.gray, vmin=0, vmax=1) 197 | plt.show() 198 | 199 | sess.close() 200 | -------------------------------------------------------------------------------- /e2c/tf_e2c/viz_results.py: -------------------------------------------------------------------------------- 1 | """ 2 | Quick-and-dirty visualization scripts for a variety of tasks. 3 | """ 4 | 5 | import sequential_e2c as e2c 6 | #import e2c_plane_z as e2c 7 | 8 | import matplotlib.pyplot as plt 9 | import tensorflow as tf 10 | import numpy as np 11 | from data.plane_data2 import PlaneData, get_params 12 | 13 | import ipdb as pdb 14 | 15 | 16 | def show_recons_samples(sess, ckptfile): 17 | # visualize sample reconstructions 18 | e2c.saver.restore(sess, ckptfile) # restore variable values 19 | dataset = PlaneData("data/plane1.npz", "data/env1.png") 20 | dataset.initialize() 21 | (x_val, u_val, x_next_val) = dataset.sample(e2c.batch_size) 22 | xr, xp = sess.run( 23 | [e2c.x_recons, e2c.x_predict], 24 | feed_dict={e2c.x: x_val, 25 | e2c.u: u_val, 26 | e2c.x_next: x_next_val}) 27 | #xr,xp=sess.run([e2c.x_recons0, e2c.x_predict0],feed_dict={e2c.x0:x_val,e2c.u0:u_val,e2c.x1:x_next_val}) 28 | A, B = e2c.A, e2c.B 29 | 30 | def getimgs(x, xnext): 31 | padsize = 1 32 | padval = .5 33 | ph = B + 2 * padsize 34 | pw = A + 2 * padsize 35 | img = np.ones((10 * ph, 2 * pw)) * padval 36 | for i in range(10): 37 | startr = i * ph + padsize 38 | img[startr:startr + B, padsize:padsize + A] = x[i, :].reshape((A, 39 | B)) 40 | for i in range(10): 41 | startr = i * ph + padsize 42 | img[startr:startr + B, pw + padsize:pw + padsize + A] = xnext[ 43 | i, :].reshape((A, B)) 44 | return img 45 | 46 | fig, arr = plt.subplots(1, 2) 47 | arr[0].matshow( 48 | getimgs(x_val, x_next_val), cmap=plt.cm.gray, vmin=0, vmax=1) 49 | arr[0].set_title('Data') 50 | arr[1].matshow(getimgs(xr, xp), cmap=plt.cm.gray, vmin=0, vmax=1) 51 | arr[1].set_title('Reconstruction') 52 | plt.show() 53 | 54 | 55 | def show_recons_seq(sess, ckptfile): 56 | e2c.saver.restore(sess, ckptfile) # restore variable values 57 | dataset = PlaneData("data/plane1.npz", "data/env1.png") 58 | dataset.initialize() 59 | T = e2c.T 60 | print(T) 61 | (x_vals, u_vals) = dataset.sample_seq(e2c.batch_size, T) 62 | feed_dict = {} 63 | for t in range(T): 64 | feed_dict[e2c.xs[t]] = x_vals[t] 65 | for t in range(T - 1): 66 | feed_dict[e2c.us[t]] = u_vals[t] 67 | 68 | fetches = e2c.x_recons + e2c.x_predicts 69 | results = sess.run(fetches, feed_dict) 70 | xr = results[:T - 1] 71 | xp = results[T - 1:] 72 | A, B = e2c.A, e2c.B 73 | 74 | def getimgs(x): 75 | padsize = 1 76 | padval = .5 77 | ph = B + 2 * padsize 78 | pw = A + 2 * padsize 79 | img = np.ones((ph, len(x) * pw)) * padval 80 | for t in range(len(x)): 81 | startc = t * pw + padsize 82 | img[padsize:padsize + B, startc:startc + A] = x[t][1, :].reshape( 83 | (A, B)) 84 | return img 85 | 86 | fig, arr = plt.subplots(3, 1) 87 | arr[0].matshow(getimgs(x_vals), cmap=plt.cm.gray, vmin=0, vmax=1) 88 | arr[0].set_title('X') 89 | arr[1].matshow(getimgs(xr), cmap=plt.cm.gray, vmin=0, vmax=1) 90 | arr[1].set_title('Reconstruction') 91 | arr[2].matshow(getimgs(xp), cmap=plt.cm.gray, vmin=0, vmax=1) 92 | arr[2].set_title('Prediction') 93 | plt.show() 94 | 95 | 96 | def viz_z(sess, ckptfile): 97 | e2c.saver.restore(sess, ckptfile) # restore variable values 98 | dataset = PlaneData("data/plane1.npz", "data/env1.png") 99 | Ps, NPs = dataset.getPSpace() 100 | batch_size = e2c.batch_size 101 | n0 = NPs.shape[0] 102 | if False: 103 | Ps = np.vstack((Ps, NPs)) 104 | xy = np.zeros([Ps.shape[0], 2]) 105 | xy[:, 0] = Ps[:, 1] 106 | xy[:, 107 | 1] = 20 - Ps[:, 108 | 0] # for the purpose of computing theta, map centered @ origin 109 | Zs = np.zeros([Ps.shape[0], e2c.z_dim]) 110 | 111 | theta = np.arctan(xy[:, 1] / xy[:, 0]) 112 | for i in range(Ps.shape[0] // batch_size): 113 | print("batch %d" % i) 114 | x_val = dataset.getXPs(Ps[i * batch_size:(i + 1) * batch_size, :]) 115 | Zs[i * batch_size:(i + 1) * batch_size, :] = sess.run( 116 | e2c.z, {e2c.x: x_val}) 117 | # last remaining points may not fit precisely into 1 minibatch. 118 | x_val = dataset.getXPs(Ps[-batch_size:, :]) 119 | Zs[-batch_size:, :] = sess.run(e2c.z, {e2c.x: x_val}) 120 | 121 | if False: 122 | theta[-n0:] = 1 123 | 124 | fig, arr = plt.subplots(1, 2) 125 | arr[0].scatter(Ps[:, 1], 40 - Ps[:, 0], c=(np.pi + theta) / (2 * np.pi)) 126 | arr[0].set_title('True State Space') 127 | arr[1].scatter(Zs[:, 0], Zs[:, 1], c=(np.pi + theta) / (2 * np.pi)) 128 | arr[1].set_title('Latent Space Z') 129 | #plt.show() 130 | return fig 131 | 132 | 133 | def viz_z_unfold(sess, cpktprefix): 134 | d = 100 # interval 135 | for i in range(int(1e5) // d): 136 | f = "%s-%05d" % (cpktprefix, i * d) 137 | ckptfile = f + ".ckpt" 138 | print(ckptfile) 139 | fig = viz_z(sess, ckptfile) 140 | fig.suptitle('%d' % (i * d)) 141 | fig.savefig(f + ".png") 142 | # combine with convert -delay 10 -loop 0 e2c-plane-*.png out.gif 143 | # then reduce size using gifsicle -O3-colors 256 < out.gif > new.gif 144 | print('done!') 145 | 146 | 147 | if __name__ == "__main__": 148 | sess = tf.InteractiveSession() 149 | #viz_z_unfold(sess, "/ltmp/e2c-plane") 150 | fig = viz_z(sess, "/ltmp/e2c-plane-199000.ckpt") 151 | #show_recons_samples(sess,"/ltmp/e2c-plane-186000.ckpt") 152 | show_recons_seq(sess, "/ltmp/e2c-plane-199000.ckpt") 153 | plt.show() 154 | sess.close() 155 | -------------------------------------------------------------------------------- /e2c/vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch import nn 4 | from .configs import load_config 5 | from .losses import binary_crossentropy 6 | 7 | 8 | class VAE(torch.nn.Module): 9 | def __init__(self, dim_in, dim_z, config='pendulum'): 10 | super(VAE, self).__init__() 11 | enc, trans, dec = load_config(config) 12 | self.encoder = enc(dim_in, dim_z) 13 | self.decoder = dec(dim_z, dim_in) 14 | 15 | def reparam(self, mean, logvar): 16 | std = logvar.mul(0.5).exp_() 17 | self.z_mean = mean 18 | self.z_sigma = std 19 | eps = torch.FloatTensor(std.size()).normal_() 20 | if std.data.is_cuda: 21 | eps.cuda() 22 | eps = Variable(eps) 23 | return eps.mul(std).add_(mean) 24 | 25 | def forward(self, x): 26 | self.z_mean, logvar = self.encoder(x) 27 | self.z_logsigma = logvar.mul(0.5) 28 | 29 | z = self.reparam(self.z_mean, logvar) 30 | x_dec = self.decoder(z) 31 | 32 | return x_dec 33 | 34 | def latent_embeddings(self, x): 35 | return self.encoder(x)[0] 36 | 37 | 38 | def compute_loss(x_pred, x_true, z_mean, z_logsigma, mse=False): 39 | # see Appendix B from VAE paper: 40 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 41 | # https://arxiv.org/abs/1312.6114 42 | # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 43 | if mse: 44 | x_reconst_loss = (x_pred - x_true).pow(2).sum(dim=1) 45 | else: 46 | x_reconst_loss = -binary_crossentropy(x_true, x_pred).sum(dim=1) 47 | logvar = z_logsigma.mul(2) 48 | KLD_element = z_mean.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar) 49 | KLD = torch.sum(KLD_element, dim=1).mul(-0.5) 50 | return x_reconst_loss.mean(), KLD.mean() --------------------------------------------------------------------------------