├── LICENSE
├── README.md
├── __init__.py
├── dataset.py
├── e2c_plane.py
├── e2c_seq.py
├── env0.png
├── env1.png
├── env_blank.png
├── plane_data2.py
├── tests.py
├── vae.py
└── viz_results.py


/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                  Version 2.0, January 2004
  3 |               http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 | "License" shall mean the terms and conditions for use, reproduction,
 10 | and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 | "Licensor" shall mean the copyright owner or entity authorized by
 13 | the copyright owner that is granting the License.
 14 | 
 15 | "Legal Entity" shall mean the union of the acting entity and all
 16 | other entities that control, are controlled by, or are under common
 17 | control with that entity. For the purposes of this definition,
 18 | "control" means (i) the power, direct or indirect, to cause the
 19 | direction or management of such entity, whether by contract or
 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 | outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 | "You" (or "Your") shall mean an individual or Legal Entity
 24 | exercising permissions granted by this License.
 25 | 
 26 | "Source" form shall mean the preferred form for making modifications,
 27 | including but not limited to software source code, documentation
 28 | source, and configuration files.
 29 | 
 30 | "Object" form shall mean any form resulting from mechanical
 31 | transformation or translation of a Source form, including but
 32 | not limited to compiled object code, generated documentation,
 33 | and conversions to other media types.
 34 | 
 35 | "Work" shall mean the work of authorship, whether in Source or
 36 | Object form, made available under the License, as indicated by a
 37 | copyright notice that is included in or attached to the work
 38 | (an example is provided in the Appendix below).
 39 | 
 40 | "Derivative Works" shall mean any work, whether in Source or Object
 41 | form, that is based on (or derived from) the Work and for which the
 42 | editorial revisions, annotations, elaborations, or other modifications
 43 | represent, as a whole, an original work of authorship. For the purposes
 44 | of this License, Derivative Works shall not include works that remain
 45 | separable from, or merely link (or bind by name) to the interfaces of,
 46 | the Work and Derivative Works thereof.
 47 | 
 48 | "Contribution" shall mean any work of authorship, including
 49 | the original version of the Work and any modifications or additions
 50 | to that Work or Derivative Works thereof, that is intentionally
 51 | submitted to Licensor for inclusion in the Work by the copyright owner
 52 | or by an individual or Legal Entity authorized to submit on behalf of
 53 | the copyright owner. For the purposes of this definition, "submitted"
 54 | means any form of electronic, verbal, or written communication sent
 55 | to the Licensor or its representatives, including but not limited to
 56 | communication on electronic mailing lists, source code control systems,
 57 | and issue tracking systems that are managed by, or on behalf of, the
 58 | Licensor for the purpose of discussing and improving the Work, but
 59 | excluding communication that is conspicuously marked or otherwise
 60 | designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 | "Contributor" shall mean Licensor and any individual or Legal Entity
 63 | on behalf of whom a Contribution has been received by Licensor and
 64 | subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 | this License, each Contributor hereby grants to You a perpetual,
 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 | copyright license to reproduce, prepare Derivative Works of,
 70 | publicly display, publicly perform, sublicense, and distribute the
 71 | Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 | this License, each Contributor hereby grants to You a perpetual,
 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 | (except as stated in this section) patent license to make, have made,
 77 | use, offer to sell, sell, import, and otherwise transfer the Work,
 78 | where such license applies only to those patent claims licensable
 79 | by such Contributor that are necessarily infringed by their
 80 | Contribution(s) alone or by combination of their Contribution(s)
 81 | with the Work to which such Contribution(s) was submitted. If You
 82 | institute patent litigation against any entity (including a
 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 | or a Contribution incorporated within the Work constitutes direct
 85 | or contributory patent infringement, then any patent licenses
 86 | granted to You under this License for that Work shall terminate
 87 | as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 | Work or Derivative Works thereof in any medium, with or without
 91 | modifications, and in Source or Object form, provided that You
 92 | meet the following conditions:
 93 | 
 94 | (a) You must give any other recipients of the Work or
 95 | Derivative Works a copy of this License; and
 96 | 
 97 | (b) You must cause any modified files to carry prominent notices
 98 | stating that You changed the files; and
 99 | 
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 | 
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 | 
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!)  The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 | 
189 | Copyright {yyyy} {name of copyright owner}
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # e2c
 2 | 
 3 | TensorFlow impementation of: [Embed to Control: A Locally Linear Latent Dynamics Model for Control from Raw Images](http://arxiv.org/abs/1506.07365), with code optimized for clarity and simplicity.
 4 | 
 5 | ![latent](http://i.imgur.com/zO5G3K0.png)
 6 | 
 7 | Only 160 lines of code, and only uses Python modules that come installed with TensorFlow. Proper writeup explaining the paper plus improved model code to soon follow.
 8 | 
 9 | ## Results
10 | 
11 | Left column are x_t, x_{t+1}, and right column are the E2C reconstructions.
12 | ![reconstruction](https://1.bp.blogspot.com/-L2qTQr8XZMY/Vv3cgLAklqI/AAAAAAAAE8g/rjMk2Z98XxEalKyXvtZUGeHtArdsD2vBg/s640/figure_1.png)
13 | 
14 | Larger step sizes (magnitude of u) yield better latent space reconstruction...
15 | 
16 | ![unfolding latent space](http://i.imgur.com/DF6Gd96.gif)
17 | 
18 | but degrade image reconstruction fidelity (more on this later...). Here's a different set of obstacles:
19 | 
20 | ![poor reconstruction](http://i.imgur.com/cl9RjlR.png)
21 | 
22 | ## Features:
23 | - Implements the standard E2C model with the factorized Gaussian KL divergence term (Eq. 14)
24 | - Adam Optimizer + Orthogonal weight initialization scheme by [Saxe et al.](http://arxiv.org/abs/1312.6120).
25 | - Learns the latent space of the planar control task (uses the same parameters described in the paper, Appendix B.6.2)
26 | 
27 | ## Training the Model
28 | 
29 | First, generate the synthetic training data `plane2.npz` by running the following script.
30 | 
31 | ```bash
32 | $ python plane_data2.py
33 | ```
34 | 
35 | Then, train the model
36 | ```bash
37 | $ python e2c.py
38 | ```
39 | 
40 | You can then generate visualizations by executing:
41 | 
42 | ```bash
43 | $ python viz_results.py
44 | ```
45 | 
46 | ## Acknowledgements
47 | 
48 | Thanks to Manuel Watter for answering my questions about the paper.
49 | 
50 | ## License
51 | 
52 | Apache 2.0
53 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ericjang/e2c/26e98223e3c6fa2eadfab2fbf80a3801d91fdb2a/__init__.py


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
1 | # abstract helper class customized for training E2C tasks
2 | 
3 | class DataSet(object):
4 | 	"""docstring for DataSet"""
5 | 	def __init__(self):
6 | 		super(DataSet, self).__init__()
7 | 


--------------------------------------------------------------------------------
/e2c_plane.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Implementation of Embed-to-Control model: http://arxiv.org/abs/1506.07365
  5 | Code is organized for simplicity and readability w.r.t paper.
  6 | 
  7 | Author: Eric Jang
  8 | """
  9 | 
 10 | import numpy as np
 11 | import tensorflow as tf
 12 | import matplotlib.pyplot as plt
 13 | import os
 14 | from data.plane_data2 import PlaneData, get_params
 15 | 
 16 | import ipdb as pdb
 17 | # np.random.seed(0)
 18 | tf.set_random_seed(0)
 19 | 
 20 | A=B=40
 21 | 
 22 | x_dim,u_dim,T=get_params()
 23 | z_dim=2 # latent space dimensionality
 24 | eps=1e-9 # numerical stability
 25 | 
 26 | def orthogonal_initializer(scale = 1.1):
 27 |   """
 28 |   From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
 29 |   """
 30 |   def _initializer(shape, dtype=tf.float32):
 31 |     flat_shape = (shape[0], np.prod(shape[1:]))
 32 |     a = np.random.normal(0.0, 1.0, flat_shape)
 33 |     u, _, v = np.linalg.svd(a, full_matrices=False)
 34 |     # pick the one with the correct shape
 35 |     q = u if u.shape == flat_shape else v
 36 |     q = q.reshape(shape)
 37 |     print('Warning -- You have opted to use the orthogonal_initializer function')
 38 |     return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
 39 |   return _initializer
 40 | 
 41 | class NormalDistribution(object):
 42 |   """
 43 |   Represents a multivariate normal distribution parameterized by
 44 |   N(mu,Cov). If cov. matrix is diagonal, Cov=(sigma).^2. Otherwise,
 45 |   Cov=A*(sigma).^2*A', where A = (I+v*r^T).
 46 |   """
 47 |   def __init__(self,mu,sigma,logsigma,v=None,r=None):
 48 |     self.mu=mu
 49 |     self.sigma=sigma # either stdev diagonal itself, or stdev diagonal from decomposition
 50 |     self.logsigma=logsigma
 51 |     dim=mu.get_shape()
 52 |     if v is None:
 53 |       v=tf.constant(0.,shape=dim)
 54 |     if r is None:
 55 |       r=tf.constant(0.,shape=dim)
 56 |     self.v=v
 57 |     self.r=r
 58 | 
 59 | def linear(x,output_dim):
 60 |   w=tf.get_variable("w", [x.get_shape()[1], output_dim], initializer=orthogonal_initializer(1.1))
 61 |   b=tf.get_variable("b", [output_dim], initializer=tf.constant_initializer(0.0))
 62 |   return tf.matmul(x,w)+b
 63 | 
 64 | def ReLU(x,output_dim, scope):
 65 |   # helper function for implementing stacked ReLU layers
 66 |   with tf.variable_scope(scope):
 67 |     return tf.nn.relu(linear(x,output_dim))
 68 | 
 69 | def encode(x,share=None):
 70 |   with tf.variable_scope("encoder",reuse=share):
 71 |     for l in range(3):
 72 |       x=ReLU(x,150,"l"+str(l))
 73 |     return linear(x,2*z_dim)
 74 | 
 75 | def KLGaussian(Q,N):
 76 |   # Q, N are instances of NormalDistribution
 77 |   # implements KL Divergence term KL(N0,N1) derived in Appendix A.1
 78 |   # Q ~ Normal(mu,A*sigma*A^T), N ~ Normal(mu,sigma_1)
 79 |   # returns scalar divergence, measured in nats (information units under log rather than log2), shape= batch x 1
 80 |   sum=lambda x: tf.reduce_sum(x,1) # convenience fn for summing over features (columns)
 81 |   k=float(Q.mu.get_shape()[1].value) # dimension of distribution
 82 |   mu0,v,r,mu1=Q.mu,Q.v,Q.r,N.mu
 83 |   s02,s12=tf.square(Q.sigma),tf.square(N.sigma)+eps
 84 |   #vr=sum(v*r)
 85 |   a=sum(s02*(1.+2.*v*r)/s12) + sum(tf.square(v)/s12)*sum(tf.square(r)*s02) # trace term
 86 |   b=sum(tf.square(mu1-mu0)/s12) # difference-of-means term
 87 |   c=2.*(sum(N.logsigma-Q.logsigma) - tf.log(1.+sum(v*r))) # ratio-of-determinants term. 
 88 |   return 0.5*(a+b-k+c)#, a, b, c
 89 | 
 90 | def sampleNormal(mu,sigma):
 91 |   # diagonal stdev
 92 |   n01=tf.random_normal(sigma.get_shape(), mean=0, stddev=1)
 93 |   return mu+sigma*n01
 94 | 
 95 | def sampleQ_phi(h_enc,share=None):
 96 |   with tf.variable_scope("sampleQ_phi",reuse=share):
 97 |     mu,log_sigma=tf.split(1,2,linear(h_enc,z_dim*2)) # diagonal stdev values
 98 |     sigma=tf.exp(log_sigma)
 99 |     return sampleNormal(mu,sigma), NormalDistribution(mu, sigma, log_sigma)
100 | 
101 | def transition(h):
102 |   # compute A,B,o linearization matrices
103 |   with tf.variable_scope("trans"):
104 |     for l in range(2):
105 |       h=ReLU(h,100,"l"+str(l))
106 |     with tf.variable_scope("A"):
107 |       v,r=tf.split(1,2,linear(h,z_dim*2))
108 |       v1=tf.expand_dims(v,-1) # (batch, z_dim, 1)
109 |       rT=tf.expand_dims(r,1) # batch, 1, z_dim
110 |       I=tf.diag([1.]*z_dim)
111 |       A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) 
112 |     with tf.variable_scope("B"):
113 |       B=linear(h,z_dim*u_dim)
114 |       B=tf.reshape(B,[-1,z_dim,u_dim])
115 |     with tf.variable_scope("o"):
116 |       o=linear(h,z_dim)
117 |     return A,B,o,v,r
118 | 
119 | def sampleQ_psi(z,u,Q_phi):
120 |   A,B,o,v,r=transition(z)
121 |   with tf.variable_scope("sampleQ_psi"):
122 |     mu_t=tf.expand_dims(Q_phi.mu,-1) # batch,z_dim,1
123 |     Amu=tf.squeeze(tf.batch_matmul(A,mu_t), [-1])
124 |     u=tf.expand_dims(u,-1) # batch,u_dim,1
125 |     Bu=tf.squeeze(tf.batch_matmul(B,u),[-1])
126 |     Q_psi=NormalDistribution(Amu+Bu+o,Q_phi.sigma,Q_phi.logsigma, v, r)
127 |     # the actual z_next sample is generated by deterministically transforming z_t
128 |     z=tf.expand_dims(z,-1)
129 |     Az=tf.squeeze(tf.batch_matmul(A,z),[-1])
130 |     z_next=Az+Bu+o
131 |     return z_next,Q_psi#,(A,B,o,v,r) # debugging
132 | 
133 | def decode(z,share=None):
134 |   with tf.variable_scope("decoder",reuse=share):
135 |     for l in range(2):
136 |       z=ReLU(z,200,"l"+str(l))
137 |     return linear(z,x_dim)
138 | 
139 | def binary_crossentropy(t,o):
140 |     return t*tf.log(o+eps) + (1.0-t)*tf.log(1.0-o+eps)
141 | 
142 | def recons_loss(x,x_recons):
143 |   with tf.variable_scope("Lx"):
144 |     ll=tf.reduce_sum(binary_crossentropy(x,x_recons),1) # sum across features
145 |     return -ll # negative log-likelihood
146 | 
147 | def latent_loss(Q):
148 |   with tf.variable_scope("Lz"):
149 |     mu2=tf.square(Q.mu)
150 |     sigma2=tf.square(Q.sigma)
151 |     # negative of the upper bound of posterior
152 |     return -0.5*tf.reduce_sum(1+2*Q.logsigma-mu2-sigma2,1)
153 | 
154 | def sampleP_theta(h_dec,share=None):
155 |   # sample x from bernoulli distribution with means p=W(h_dec)
156 |   with tf.variable_scope("P_theta",reuse=share):
157 |     p=linear(h_dec,x_dim)
158 |     return tf.sigmoid(p) # mean of bernoulli distribution
159 | 
160 | # BUILD NETWORK
161 | batch_size=128
162 | 
163 | x=tf.placeholder(tf.float32,[batch_size, x_dim])
164 | u=tf.placeholder(tf.float32, [batch_size, u_dim]) # control at time t
165 | x_next=tf.placeholder(tf.float32, [batch_size, x_dim]) # observation at time t+1
166 | 
167 | # encode x_t
168 | h_enc=encode(x)
169 | z,Q_phi=sampleQ_phi(h_enc)
170 | # reconstitute x_t
171 | h_dec=decode(z)
172 | x_recons=sampleP_theta(h_dec)
173 | # compute linearized dynamics, predict new latent state
174 | z_predict,Q_psi=sampleQ_psi(z,u,Q_phi)
175 | # decode prediction
176 | h_dec_predict=decode(z_predict,share=True)
177 | x_predict=sampleP_theta(h_dec_predict,share=True)
178 | # encode next 
179 | h_enc_next=encode(x_next,share=True)
180 | z_next,Q_phi_next=sampleQ_phi(h_enc_next,share=True)
181 | 
182 | with tf.variable_scope("Loss"):
183 |   L_x=recons_loss(x,x_recons)
184 |   L_x_next=recons_loss(x_next,x_predict)
185 |   L_z=latent_loss(Q_phi)
186 |   L_bound=L_x+L_x_next+L_z
187 |   KL=KLGaussian(Q_psi,Q_phi_next)
188 |   lambd=0.25
189 |   loss=tf.reduce_mean(L_bound+lambd*KL) # average loss over minibatch to single scalar
190 | 
191 | for v in tf.all_variables():
192 |     print("%s : %s" % (v.name, v.get_shape()))
193 | 
194 | pdb.set_trace()
195 | 
196 | with tf.variable_scope("Optimizer"):
197 |   learning_rate=1e-4
198 |   optimizer=tf.train.AdamOptimizer(learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1
199 |   train_op=optimizer.minimize(loss)
200 | 
201 | saver = tf.train.Saver(max_to_keep=200) # keep all checkpoint files
202 | 
203 | ckpt_file="/ltmp/e2c-plane"
204 | 
205 | # summaries
206 | tf.scalar_summary("loss", loss)
207 | tf.scalar_summary("L_x", tf.reduce_mean(L_x))
208 | tf.scalar_summary("L_x_next", tf.reduce_mean(L_x_next))
209 | tf.scalar_summary("L_z", tf.reduce_mean(L_z))
210 | tf.scalar_summary("KL",tf.reduce_mean(KL))
211 | all_summaries = tf.merge_all_summaries()
212 | 
213 | # TRAIN
214 | if __name__=="__main__":
215 |   init=tf.initialize_all_variables()
216 |   sess=tf.InteractiveSession()
217 |   sess.run(init)
218 |   # WRITER
219 |   writer = tf.train.SummaryWriter("/ltmp/e2c", sess.graph_def)
220 | 
221 |   dataset=PlaneData("data/plane1.npz","data/env1.png")
222 |   dataset.initialize()
223 | 
224 |   # tmp
225 |   # (x_val,u_val,x_next_val)=dataset.sample(batch_size, replace=False)
226 |   # feed_dict={
227 |   #   x:x_val,
228 |   #   u:u_val,
229 |   #   x_next:x_next_val
230 |   # }
231 |   # results=sess.run([L_x,L_x_next,L_z,L_bound,KL],feed_dict)
232 |   # pdb.set_trace()
233 |   # resume training
234 |   #saver.restore(sess, "/ltmp/e2c-plane-83000.ckpt")
235 |   train_iters=2e5 # 5K iters
236 |   for i in range(int(train_iters)):
237 |     (x_val,u_val,x_next_val)=dataset.sample(batch_size, replace=False)
238 |     feed_dict={
239 |       x:x_val,
240 |       u:u_val,
241 |       x_next:x_next_val
242 |     }
243 |     plt.hist(x_val[0,:])
244 |     plt.show()
245 |     results=sess.run([loss,all_summaries,train_op],feed_dict)
246 |     if i%1000==0:
247 |       print("iter=%d : Loss: %f" % (i,results[0]))
248 |       if i>2000:
249 |         writer.add_summary(results[1], i)
250 |     if (i%100==0 and i < 1000) or (i % 1000 == 0):
251 |       saver.save(sess,ckpt_file+"-%05d"%(i)+".ckpt")
252 | 
253 |   sess.close()
254 | 


--------------------------------------------------------------------------------
/e2c_seq.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | multi-step prediction using E2C
  5 | results are kind of poor...
  6 | """
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | import matplotlib.pyplot as plt
 11 | import os
 12 | from data.plane_data2 import PlaneData, get_params
 13 | 
 14 | A=B=40
 15 | 
 16 | x_dim,u_dim,T=get_params()
 17 | z_dim=2 # latent space dimensionality
 18 | eps=1e-9 # numerical stability
 19 | 
 20 | def orthogonal_initializer(scale = 1.1):
 21 |   """
 22 |   From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
 23 |   """
 24 |   def _initializer(shape, dtype=tf.float32):
 25 |     flat_shape = (shape[0], np.prod(shape[1:]))
 26 |     a = np.random.normal(0.0, 1.0, flat_shape)
 27 |     u, _, v = np.linalg.svd(a, full_matrices=False)
 28 |     # pick the one with the correct shape
 29 |     q = u if u.shape == flat_shape else v
 30 |     q = q.reshape(shape)
 31 |     print('Warning -- You have opted to use the orthogonal_initializer function')
 32 |     return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
 33 |   return _initializer
 34 | 
 35 | class NormalDistribution(object):
 36 |   """
 37 |   Represents a multivariate normal distribution parameterized by
 38 |   N(mu,Cov). If cov. matrix is diagonal, Cov=(sigma).^2. Otherwise,
 39 |   Cov=A*(sigma).^2*A', where A = (I+v*r^T).
 40 |   """
 41 |   def __init__(self,mu,sigma,logsigma,v=None,r=None):
 42 |     self.mu=mu
 43 |     self.sigma=sigma # either stdev diagonal itself, or stdev diagonal from decomposition
 44 |     self.logsigma=logsigma
 45 |     dim=mu.get_shape()
 46 |     if v is None:
 47 |       v=tf.constant(0.,shape=dim)
 48 |     if r is None:
 49 |       r=tf.constant(0.,shape=dim)
 50 |     self.v=v
 51 |     self.r=r
 52 | 
 53 | def linear(x,output_dim):
 54 |   w=tf.get_variable("w", [x.get_shape()[1], output_dim], initializer=orthogonal_initializer(1.1))
 55 |   b=tf.get_variable("b", [output_dim], initializer=tf.constant_initializer(0.0))
 56 |   return tf.matmul(x,w)+b
 57 | 
 58 | def ReLU(x,output_dim, scope):
 59 |   # helper function for implementing stacked ReLU layers
 60 |   with tf.variable_scope(scope):
 61 |     return tf.nn.relu(linear(x,output_dim))
 62 | 
 63 | def encode(x,share=None):
 64 |   with tf.variable_scope("encoder",reuse=share):
 65 |     for l in range(3):
 66 |       x=ReLU(x,150,"l"+str(l))
 67 |     return linear(x,2*z_dim)
 68 | 
 69 | def KLGaussian(Q,N):
 70 |   # Q, N are instances of NormalDistribution
 71 |   # implements KL Divergence term KL(N0,N1) derived in Appendix A.1
 72 |   # Q ~ Normal(mu,A*sigma*A^T), N ~ Normal(mu,sigma_1)
 73 |   # returns scalar divergence, measured in nats (information units under log rather than log2), shape= batch x 1
 74 |   sum=lambda x: tf.reduce_sum(x,1) # convenience fn for summing over features (columns)
 75 |   k=float(Q.mu.get_shape()[1].value) # dimension of distribution
 76 |   mu0,v,r,mu1=Q.mu,Q.v,Q.r,N.mu
 77 |   s02,s12=tf.square(Q.sigma),tf.square(N.sigma)+eps
 78 |   #vr=sum(v*r)
 79 |   a=sum(s02*(1.+2.*v*r)/s12) + sum(tf.square(v)/s12)*sum(tf.square(r)*s02) # trace term
 80 |   b=sum(tf.square(mu1-mu0)/s12) # difference-of-means term
 81 |   c=2.*(sum(N.logsigma-Q.logsigma) - tf.log(1.+sum(v*r))) # ratio-of-determinants term. 
 82 |   return 0.5*(a+b-k+c)#, a, b, c
 83 | 
 84 | def sampleNormal(mu,sigma):
 85 |   # diagonal stdev
 86 |   n01=tf.random_normal(sigma.get_shape(), mean=0, stddev=1)
 87 |   return mu+sigma*n01
 88 | 
 89 | def sampleQ_phi(h_enc,share=None):
 90 |   with tf.variable_scope("sampleQ_phi",reuse=share):
 91 |     mu,log_sigma=tf.split(1,2,linear(h_enc,z_dim*2)) # diagonal stdev values
 92 |     sigma=tf.exp(log_sigma)
 93 |     return sampleNormal(mu,sigma), NormalDistribution(mu, sigma, log_sigma)
 94 | 
 95 | def transition(h,share=None):
 96 |   # compute A,B,o linearization matrices
 97 |   with tf.variable_scope("trans",reuse=share):
 98 |     for l in range(2):
 99 |       h=ReLU(h,100,"l"+str(l))
100 |     with tf.variable_scope("A"):
101 |       v,r=tf.split(1,2,linear(h,z_dim*2))
102 |       v1=tf.expand_dims(v,-1) # (batch, z_dim, 1)
103 |       rT=tf.expand_dims(r,1) # batch, 1, z_dim
104 |       I=tf.diag([1.]*z_dim)
105 |       A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) 
106 |     with tf.variable_scope("B"):
107 |       B=linear(h,z_dim*u_dim)
108 |       B=tf.reshape(B,[-1,z_dim,u_dim])
109 |     with tf.variable_scope("o"):
110 |       o=linear(h,z_dim)
111 |     return A,B,o,v,r
112 | 
113 | def sampleQ_psi(z,u,Q_phi,share=None):
114 |   A,B,o,v,r=transition(z,share)
115 |   with tf.variable_scope("sampleQ_psi"):
116 |     mu_t=tf.expand_dims(Q_phi.mu,-1) # batch,z_dim,1
117 |     Amu=tf.squeeze(tf.batch_matmul(A,mu_t), [-1])
118 |     u=tf.expand_dims(u,-1) # batch,u_dim,1
119 |     Bu=tf.squeeze(tf.batch_matmul(B,u),[-1])
120 |     Q_psi=NormalDistribution(Amu+Bu+o,Q_phi.sigma,Q_phi.logsigma, v, r)
121 |     # the actual z_next sample is generated by deterministically transforming z_t
122 |     z=tf.expand_dims(z,-1)
123 |     Az=tf.squeeze(tf.batch_matmul(A,z),[-1])
124 |     z_next=Az+Bu+o
125 |     return z_next,Q_psi#,(A,B,o,v,r) # debugging
126 | 
127 | def decode(z,share=None):
128 |   with tf.variable_scope("decoder",reuse=share):
129 |     for l in range(2):
130 |       z=ReLU(z,200,"l"+str(l))
131 |     return linear(z,x_dim)
132 | 
133 | def binary_crossentropy(t,o):
134 |     return t*tf.log(o+eps) + (1.0-t)*tf.log(1.0-o+eps)
135 | 
136 | def recons_loss(x,x_recons):
137 |   with tf.variable_scope("Lx"):
138 |     ll=tf.reduce_sum(binary_crossentropy(x,x_recons),1) # sum across features
139 |     return -ll # negative log-likelihood
140 | 
141 | def latent_loss(Q):
142 |   with tf.variable_scope("Lz"):
143 |     mu2=tf.square(Q.mu)
144 |     sigma2=tf.square(Q.sigma)
145 |     # negative of the upper bound of posterior
146 |     return -0.5*tf.reduce_sum(1+2*Q.logsigma-mu2-sigma2,1)
147 | 
148 | def sampleP_theta(h_dec,share=None):
149 |   # sample x from bernoulli distribution with means p=W(h_dec)
150 |   with tf.variable_scope("P_theta",reuse=share):
151 |     p=linear(h_dec,x_dim)
152 |     return tf.sigmoid(p) # mean of bernoulli distribution
153 | 
154 | # BUILD NETWORK
155 | batch_size=64
156 | 
157 | 
158 | # modified
159 | T=10 # sequence len
160 | 
161 | # inputs
162 | xs=[tf.placeholder(tf.float32,[batch_size, x_dim])]*T
163 | us=[tf.placeholder(tf.float32, [batch_size, u_dim])]*(T-1)
164 | 
165 | x_recons=[0]*(T-1) # we'll apply VAE recons to the last one in the sequence as well
166 | x_predicts=[0]*(T-1)
167 | 
168 | # T-1 recons and prediction losses
169 | losses=[0]*(T-1)
170 | kl_losses=[0]*(T-1)
171 | lbound_losses=[0]*(T-1)
172 | print(T)
173 | # arrays for storing losses
174 | DO_SHARE=None
175 | for t in range(T-1):
176 |   if t==0:
177 |     x=xs[0] # we are given the first one
178 |   else:
179 |     x=x_predicts[t-1] # re-use our hallucinations to predict
180 |   #x=xs[t]
181 |   h_enc=encode(x,share=DO_SHARE)
182 |   z,Q_phi=sampleQ_phi(h_enc,share=DO_SHARE)
183 |   # reconstitute x_t
184 |   h_dec=decode(z,share=DO_SHARE)
185 |   x_recons[t]=sampleP_theta(h_dec,share=DO_SHARE)
186 |   # linearized dynamics, predict
187 |   # compute linearized dynamics, predict new latent state
188 |   z_predict,Q_psi=sampleQ_psi(z,us[t],Q_phi,share=DO_SHARE)
189 |   DO_SHARE=True # share from now on
190 |   h_dec_predict=decode(z_predict,share=DO_SHARE)
191 |   x_predicts[t]=sampleP_theta(h_dec_predict,share=DO_SHARE)
192 |   # encode next true data
193 |   h_enc_next=encode(xs[t+1],share=DO_SHARE)
194 |   z_next,Q_phi_next=sampleQ_phi(h_enc_next,share=DO_SHARE)
195 | 
196 |   # add loss terms
197 |   L_x=recons_loss(xs[t],x_recons[t])
198 |   L_x_next=recons_loss(xs[t+1],x_predicts[t])
199 |   L_z=latent_loss(Q_phi)
200 |   L_bound=L_x+L_x_next+L_z
201 |   KL=KLGaussian(Q_psi,Q_phi_next)
202 |   lambd=1.0
203 |   
204 |   lbound_losses[t]=tf.reduce_mean(L_bound)
205 |   kl_losses[t]=tf.reduce_mean(lambd*KL)
206 |   losses[t]=tf.reduce_mean(L_bound+lambd*KL) # average loss over minibatch to single scalar
207 | 
208 | 
209 | loss=tf.add_n(losses) # sum all the losses to compute the sequence loss
210 | 
211 | for v in tf.all_variables():
212 |     print("%s : %s" % (v.name, v.get_shape()))
213 | 
214 | with tf.variable_scope("Optimizer"):
215 |   learning_rate=1e-4
216 |   optimizer=tf.train.AdamOptimizer(learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1
217 |   train_op=optimizer.minimize(loss)
218 | 
219 | saver = tf.train.Saver(max_to_keep=100) # keep all checkpoint files
220 | 
221 | ckpt_file="/ltmp/e2c-plane"
222 | 
223 | # summaries
224 | tf.scalar_summary("loss", loss)
225 | 
226 | # monitor losses in case behavior of Lz_8 differs drastically from Lz_0 prediction
227 | #for t in range(T-1):
228 | #  tf.scalar_summary("Lb_%d" % (t), lbound_losses[t])
229 | #  tf.scalar_summary("Lz_%d" % (t), kl_losses[t])
230 | #  tf.scalar_summary("loss_%d" % (t), losses[t])
231 | 
232 | # tf.scalar_summary("L_x", tf.reduce_mean(L_x))
233 | # tf.scalar_summary("L_x_next", tf.reduce_mean(L_x_next))
234 | # tf.scalar_summary("L_z", tf.reduce_mean(L_z))
235 | # tf.scalar_summary("KL",tf.reduce_mean(KL))
236 | all_summaries = tf.merge_all_summaries()
237 | 
238 | # TRAIN
239 | if __name__=="__main__":
240 |   init=tf.initialize_all_variables()
241 |   sess=tf.InteractiveSession()
242 |   sess.run(init)
243 |   # WRITER
244 |   writer = tf.train.SummaryWriter("/ltmp/e2c", sess.graph_def)
245 | 
246 |   dataset=PlaneData("data/plane1.npz","data/env1.png")
247 |   dataset.initialize()
248 | 
249 |   # resume training
250 |   saver.restore(sess, "/ltmp/e2c-plane-single1.ckpt")
251 | 
252 |   # test to make sure samples are actually trajectories
253 | 
254 |   def getimgs(x):
255 |     padsize=1
256 |     padval=.5
257 |     ph=B+2*padsize
258 |     pw=A+2*padsize
259 |     img=np.ones((ph,len(x)*pw))*padval
260 |     for t in range(len(x)):
261 |       startc=t*pw+padsize
262 |       img[padsize:padsize+B, startc:startc+A]=x[t][20,:].reshape((A,B))
263 |     return img
264 |   (x_vals,u_vals)=dataset.sample_seq(batch_size,T)
265 |   plt.matshow(getimgs(x_vals),cmap=plt.cm.gray,vmin=0,vmax=1)
266 |   plt.show()
267 | 
268 |   train_iters=2e5 # 5K iters
269 |   for i in range(int(train_iters)):
270 |     (x_vals,u_vals)=dataset.sample_seq(batch_size,T,replace=False)
271 |     feed_dict={}
272 |     for t in range(T):
273 |       feed_dict[xs[t]] = x_vals[t]
274 |     for t in range(T-1):
275 |       feed_dict[us[t]] = u_vals[t]
276 | 
277 |     results=sess.run([loss,all_summaries,train_op],feed_dict)
278 |     if i%1000==0:
279 |       print("iter=%d : Loss: %f" % (i,results[0]))
280 |       if i>2000:
281 |         writer.add_summary(results[1], i)
282 |     if (i%100==0 and i < 1000) or (i % 1000 == 0):
283 |       saver.save(sess,ckpt_file+"-%05d"%(i)+".ckpt")
284 | 
285 |   # save variables
286 |   #print("Model saved in file: %s" % saver.save(sess,ckpt_file+".ckpt"))
287 | 
288 |   sess.close()
289 | 


--------------------------------------------------------------------------------
/env0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ericjang/e2c/26e98223e3c6fa2eadfab2fbf80a3801d91fdb2a/env0.png


--------------------------------------------------------------------------------
/env1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ericjang/e2c/26e98223e3c6fa2eadfab2fbf80a3801d91fdb2a/env1.png


--------------------------------------------------------------------------------
/env_blank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ericjang/e2c/26e98223e3c6fa2eadfab2fbf80a3801d91fdb2a/env_blank.png


--------------------------------------------------------------------------------
/plane_data2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | import numpy as np
  6 | from numpy.random import randint
  7 | import os
  8 | from dataset import DataSet
  9 | 
 10 | num_t=80 # number of trajectories (i.e. number of initial states)
 11 | T=1000 # length of each trajectory sequence
 12 | u_dim=2 # control (action) dimension
 13 | w,h=40,40
 14 | x_dim=w*h
 15 | rw=1 # robot half-width
 16 | 
 17 | def get_params():
 18 |   return x_dim,u_dim,T
 19 | 
 20 | class PlaneData(DataSet):
 21 |   def __init__(self, fname, env_file):
 22 |     super(PlaneData, self).__init__()
 23 |     self.cache=fname
 24 |     self.initialized=False
 25 |     self.im=plt.imread(env_file) # grayscale
 26 |     self.params=(x_dim,u_dim,T)
 27 | 
 28 |   def is_colliding(self,p):
 29 |     if np.any([p-rw<0, p+rw>=w]):
 30 |       return True
 31 |     # check robot body overlap with obstacle field
 32 |     return np.mean(self.im[p[0]-rw:p[0]+rw+1, p[1]-rw:p[1]+rw+1]) > 0.05
 33 | 
 34 |   def compute_traj(self, max_dist=1):
 35 |     # computes P,U data for single trajectory
 36 |     # all P,U share the same environment obstacles.png
 37 |     P=np.zeros((T,2),dtype=np.int) # r,c position
 38 |     U=np.zeros((T,u_dim),dtype=np.int)
 39 |     P[0,:]=[rw,randint(rw,w-rw)] # initial location
 40 |     for t in range(1,T):
 41 |       p=np.copy(P[t-1,:])
 42 |       # dr direction
 43 |       d=randint(-1,2) # direction
 44 |       nsteps=randint(max_dist+1)
 45 |       dr=d*nsteps # applied control
 46 |       for i in range(nsteps):
 47 |         p[0]+=d
 48 |         if self.is_colliding(p):
 49 |           p[0]-=d
 50 |           break
 51 |       # dc direction
 52 |       d=randint(-1,2) # direction
 53 |       nsteps=randint(max_dist+1)
 54 |       dc=d*nsteps # applied control
 55 |       for i in range(nsteps):
 56 |         p[1]+=d
 57 |         if self.is_colliding(p):
 58 |           p[1]-=d # step back
 59 |           break
 60 |       P[t,:]=p
 61 |       U[t,:]=[dr,dc]
 62 |     return P,U
 63 | 
 64 |   def initialize(self):
 65 |     if os.path.exists(self.cache):
 66 |       self.load()
 67 |     else:
 68 |       self.precompute()
 69 |     self.initialized=True
 70 | 
 71 |   def compute_data(self):
 72 |     # compute multiple trajectories
 73 |     P=np.zeros((num_t,T,2),dtype=np.int)
 74 |     U=np.zeros((num_t,T,u_dim),dtype=np.int)
 75 |     for i in range(num_t):
 76 |       P[i,:,:], U[i,:,:] = self.compute_traj(max_dist=2)
 77 |     return P,U
 78 | 
 79 |   def precompute(self):
 80 |     print("Precomputing P,U...")
 81 |     self.P, self.U = self.compute_data()
 82 | 
 83 |   def save(self):
 84 |     print("Saving P,U...")
 85 |     np.savez(self.cache, P=self.P, U=self.U)
 86 | 
 87 |   def load(self):
 88 |     print("Loading P,U from %s..." % (self.cache))
 89 |     D=np.load(self.cache)
 90 |     self.P, self.U = D['P'], D['U']
 91 | 
 92 |   def getXp(self,p):
 93 |     # return image X given true state p (position) of robot
 94 |     x=np.copy(self.im)
 95 |     x[p[0]-rw:p[0]+rw+1, p[1]-rw:p[1]+rw+1]=1. # robot is white on black background
 96 |     return x.flat
 97 | 
 98 |   def getX(self,i,t):
 99 |     # i=trajectory index, t=time step
100 |     return self.getXp(self.P[i,t,:])
101 | 
102 |   def getXTraj(self,i):
103 |     # i=traj index
104 |     X=np.zeros((T,x_dim),dtype=np.float)
105 |     for t in range(T):
106 |       X[t,:]=self.getX(i,t)
107 |     return X
108 | 
109 |   def sample(self, batch_size):
110 |     """
111 |     computes (x_t,u_t,x_{t+1}) pair
112 |     returns tuple of 3 ndarrays with shape
113 |     (batch,x_dim), (batch, u_dim), (batch, x_dim)
114 |     """
115 |     if not self.initialized:
116 |       raise ValueError("Dataset not loaded - call PlaneData.initialize() first.")
117 |     traj=randint(0,num_t,size=batch_size) # which trajectory
118 |     tt=randint(0,T-1,size=batch_size) # time step t for each batch
119 |     X0=np.zeros((batch_size,x_dim))
120 |     U0=np.zeros((batch_size,u_dim),dtype=np.int)
121 |     X1=np.zeros((batch_size,x_dim))
122 |     for i in range(batch_size):
123 |       t=tt[i]
124 |       p=self.P[traj[i], t, :]
125 |       X0[i,:]=self.getX(traj[i],t)
126 |       X1[i,:]=self.getX(traj[i],t+1)
127 |       U0[i,:]=self.U[traj[i], t, :]
128 |     return (X0,U0,X1)
129 | 
130 |   def getPSpace(self):
131 |     """
132 |     Returns all possible positions of agent
133 |     """
134 |     ww=h-2*rw
135 |     P=np.zeros((ww*ww,2)) # max possible positions
136 |     i=0
137 |     p=np.array([rw,rw]) # initial location
138 |     for dr in range(ww):
139 |       for dc in range(ww):
140 |         if not self.is_colliding(p+np.array([dr,dc])):
141 |           P[i,:]=p+np.array([dr,dc])
142 |           i+=1
143 |     return P[:i,:]
144 | 
145 |   def getXPs(self, Ps):
146 |     X=np.zeros((Ps.shape[0],x_dim))
147 |     for i in range(Ps.shape[0]):
148 |       X[i,:]=self.getXp(Ps[i,:])
149 |     return X
150 | 
151 | if __name__ == "__main__":
152 |   import matplotlib.animation as animation
153 |   p=PlaneData("plane2.npz","env1.png")
154 |   p.initialize()
155 |   p.save()
156 |   im=p.im
157 |   A,B=im.shape
158 | 
159 |   # show sample tuples
160 |   if True:
161 |     fig, aa = plt.subplots(1,2)
162 |     x0,u0,x1=p.sample(2)
163 |     m1=aa[0].matshow(x0[0,:].reshape(w,w), cmap=plt.cm.gray, vmin = 0., vmax = 1.)
164 |     aa[0].set_title('x(t)')
165 |     m2=aa[1].matshow(x1[0,:].reshape(w,w), cmap=plt.cm.gray, vmin = 0., vmax = 1.)
166 |     aa[1].set_title('x(t+1), u=(%d,%d)' % (u0[0,0],u0[0,1]))
167 |     fig.tight_layout()
168 |     def updatemat2(t):
169 |       x0,u0,x1=p.sample(2)
170 |       m1.set_data(x0[0,:].reshape(w,w))
171 |       m2.set_data(x1[0,:].reshape(w,w))
172 |       return m1,m2
173 | 
174 |     anim=animation.FuncAnimation(fig, updatemat2, frames=100, interval=1000, blit=True, repeat=True)
175 | 
176 |     Writer = animation.writers['imagemagick'] # animation.writers.avail
177 |     writer = Writer(fps=1, metadata=dict(artist='Me'), bitrate=1800)
178 |     anim.save('sample_obs.gif', writer=writer)
179 | 
180 |   #show trajectory
181 |   if True:
182 |     fig, ax = plt.subplots()
183 |     X=p.getXTraj(0)
184 |     mat=ax.matshow(X[0,:].reshape((A,B)), cmap=plt.cm.gray, vmin = 0., vmax = 1.)
185 |     def updatemat(t):
186 |       mat.set_data(X[t,:].reshape((A,B)))
187 |       return mat,
188 |     anim = animation.FuncAnimation(fig, updatemat, frames=T-1, interval=30, blit=True, repeat=True)
189 |     plt.show()
190 | 


--------------------------------------------------------------------------------
/tests.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | test functions like KLGaussian to make sure you implemented correctly
 4 | """
 5 | 
 6 | 
 7 | import numpy as np
 8 | 
 9 | # ground truth implementation
10 | from divergence import gau_kl
11 | 
12 | pm=np.array([1.,1.,1.],dtype=np.float32) # true
13 | pv=np.array([0.1,0.3,0.5],dtype=np.float32) # diagonal covariance
14 | qm=np.array([0.,0.,0.],dtype=np.float32)
15 | qv=np.array([1.,1.,1.],dtype=np.float32)
16 | 
17 | KL,a,b,c= gau_kl(pm, pv, qm, qv) # assumes diagonal covariances...
18 | print('KL : %f' % (KL))
19 | print('trace term : %f' % (a))
20 | print('difference of means : %f' % (b))
21 | print('ratio of determinants : %f' % (c))
22 | 
23 | # my implementation
24 | 
25 | import tensorflow as tf
26 | from e2c import NormalDistribution, KLGaussian
27 | batch_size=1
28 | z_dim=3
29 | 
30 | I=tf.identity(np.tile(np.eye(z_dim,dtype=np.float32),[batch_size, 1, 1])) # identity matrix (batch_size, z_dim, z_dim)
31 | zero_z=tf.constant(0.,shape=[batch_size,z_dim])
32 | 
33 | pm=pm.reshape((batch_size,z_dim))
34 | pv=pv.reshape((batch_size,z_dim))
35 | qm=qm.reshape((batch_size,z_dim))
36 | qv=qv.reshape((batch_size,z_dim))
37 | 
38 | pmu=tf.constant(pm,shape=[batch_size,z_dim])
39 | psigma=tf.constant(np.sqrt(pv))
40 | P=NormalDistribution(pmu,psigma,tf.log(psigma),zero_z,zero_z)
41 | 
42 | qmu=tf.constant(qm,shape=[batch_size,z_dim])
43 | qsigma=tf.constant(np.sqrt(qv))
44 | Q=NormalDistribution(qmu,qsigma,tf.log(qsigma),zero_z,zero_z)
45 | 
46 | # sigma0=tf.constant([0.1,0.3,0.5])
47 | # P=NormalDistribution(one_z, sigma0, tf.log(sigma0), zero_z, zero_z)
48 | # Pz=NormalDistribution(zero_z, one_z, zero_z, zero_z, zero_z)# prior on Q_phi = mean 0, unit variance => logsigma=0
49 | 
50 | KL,a,b,c=KLGaussian(P,Q,"tmp")
51 | 
52 | sess=tf.InteractiveSession()
53 | 
54 | results=sess.run([KL,a,b,c])
55 | print('KL : %f' % (results[0]))
56 | print('trace term : %f' % (results[1]))
57 | print('difference of means : %f' % (results[2]))
58 | print('ratio of determinants : %f' % (results[3]))
59 | 
60 | sess.close()
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/vae.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | """
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | import matplotlib.pyplot as plt
  8 | import os
  9 | from tensorflow.examples.tutorials import mnist
 10 | 
 11 | A=B=40
 12 | x_dim=A*B
 13 | z_dim=2
 14 | 
 15 | eps=1e-9 # numerical stability
 16 | 
 17 | 
 18 | def orthogonal_initializer(scale = 1.1):
 19 |   ''' From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
 20 |   '''
 21 |   print('Warning -- You have opted to use the orthogonal_initializer function')
 22 |   def _initializer(shape, dtype=tf.float32):
 23 |     flat_shape = (shape[0], np.prod(shape[1:]))
 24 |     a = np.random.normal(0.0, 1.0, flat_shape)
 25 |     u, _, v = np.linalg.svd(a, full_matrices=False)
 26 |     # pick the one with the correct shape
 27 |     q = u if u.shape == flat_shape else v
 28 |     q = q.reshape(shape) #this needs to be corrected to float32
 29 |     print('you have initialized one orthogonal matrix.')
 30 |     return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
 31 |   return _initializer
 32 | 
 33 | 
 34 | class NormalDistribution(object):
 35 |   """docstring for NormalDistribution"""
 36 |   def __init__(self, mu, sigma, logsigma):
 37 |     super(NormalDistribution, self).__init__()
 38 |     self.mu=mu
 39 |     self.sigma=sigma
 40 |     self.logsigma=logsigma
 41 |     
 42 | def linear(x,output_dim):
 43 |   #w=tf.get_variable("w", [x.get_shape()[1], output_dim], initializer=tf.random_normal_initializer(mean=0.0, stddev=.01)) 
 44 |   w=tf.get_variable("w", [x.get_shape()[1], output_dim], initializer=orthogonal_initializer(1.1))
 45 |   b=tf.get_variable("b", [output_dim], initializer=tf.constant_initializer(0.0))
 46 |   return tf.matmul(x,w)+b
 47 | 
 48 | def ReLU(x,output_dim, scope):
 49 |   with tf.variable_scope(scope):
 50 |     return tf.nn.relu(linear(x,output_dim))
 51 | 
 52 | def encode(x):
 53 |   with tf.variable_scope("encoder"):
 54 |     for l in range(3):
 55 |       x=ReLU(x,150,"l"+str(l))
 56 |     return linear(x,4)
 57 |     #return tf.nn.relu(linear(x,z_dim))
 58 | 
 59 | def sampleNormal(mu,sigma):
 60 |   # note: sigma is diagonal standard deviation, not variance
 61 |   n01=tf.random_normal(mu.get_shape(), mean=0, stddev=1)
 62 |   return mu+sigma*n01
 63 | 
 64 | def sampleQ(h_enc):
 65 |   """
 66 |   Samples Zt ~ normrnd(mu,sigma) via reparameterization trick for normal dist
 67 |   mu is (batch,z_size)
 68 | 
 69 |   """
 70 |   with tf.variable_scope("sampleQ"):
 71 |     with tf.variable_scope("Q"):
 72 |       mu,log_sigma=tf.split(1,2,linear(h_enc,z_dim*2))
 73 |       sigma=tf.exp(log_sigma) # sigma_t, covariance of Q_phi
 74 |     return sampleNormal(mu,sigma), NormalDistribution(mu, log_sigma, sigma)
 75 | 
 76 | def decode(z):
 77 |   # with tf.variable_scope("decoder"):
 78 |   #   return tf.nn.relu(linear(z,x_dim))
 79 |   with tf.variable_scope("decoder"):
 80 |     for l in range(2):
 81 |       z=ReLU(z,200,"l"+str(l))
 82 |     return linear(z,x_dim)
 83 | 
 84 | def binary_crossentropy(t,o):
 85 |     return -(t*tf.log(o+eps) + (1.0-t)*tf.log(1.0-o+eps))
 86 | 
 87 | def recons_loss(x,x_recons):
 88 |   with tf.variable_scope("Lx"):
 89 |     return tf.reduce_sum(binary_crossentropy(x,x_recons),1) # sum across features
 90 | 
 91 | def latent_loss(Q):
 92 |   # KL distribution between distribution in latent space and some prior
 93 |   # (regularizer)
 94 |   with tf.variable_scope("Lz"):
 95 |     mu2=tf.square(Q.mu)
 96 |     sigma2=tf.square(Q.sigma)
 97 |     #return 0.5*tf.reduce_sum(1.+mu2+sigma2-2.*Q.logsigma,1) # sum across features
 98 |     # negative of the upper bound of posterior
 99 |     return -0.5*tf.reduce_sum(1+2*Q.logsigma-mu2-sigma2,1)
100 | 
101 | def sampleP_theta(h_dec):
102 |   # sample x from bernoulli distribution with means p=W(h_dec)
103 |   with tf.variable_scope("P_theta"):
104 |     p=linear(h_dec,x_dim)
105 |     return tf.sigmoid(p) # mean of bernoulli distribution
106 | 
107 | # BUILD NETWORK
108 | batch_size=64
109 | x=tf.placeholder(tf.float32, [batch_size, x_dim])
110 | h_enc=encode(x) # encoded space
111 | z,Q=sampleQ(h_enc) # z - latent space
112 | #h_dec=decode(h_enc) # regular autoencoder
113 | h_dec=decode(z) # decoded space
114 | x_recons=sampleP_theta(h_dec) # original space
115 | 
116 | with tf.variable_scope("Loss"):
117 |   L_x=recons_loss(x,x_recons)
118 |   L_z=latent_loss(Q)
119 |   loss=tf.reduce_mean(L_x)
120 |   #loss=tf.reduce_mean(L_x+L_z) # average over minibatch -> single scalar
121 | 
122 | with tf.variable_scope("Optimizer"):
123 |   learning_rate=1e-4
124 |   optimizer=tf.train.AdamOptimizer(learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1
125 |   train_op=optimizer.minimize(loss)
126 | 
127 | saver = tf.train.Saver() # saves variables learned during training
128 | 
129 | # summaries
130 | tf.scalar_summary("loss", loss)
131 | tf.scalar_summary("L_x", tf.reduce_mean(L_x))
132 | tf.scalar_summary("L_z", tf.reduce_mean(L_z))
133 | all_summaries = tf.merge_all_summaries()
134 | 
135 | 
136 | # TRAIN
137 | init=tf.initialize_all_variables()
138 | sess=tf.InteractiveSession()
139 | sess.run(init)
140 | # WRITER
141 | writer = tf.train.SummaryWriter("/ltmp/vae", sess.graph_def)
142 | 
143 | # PLANE TASK
144 | ckpt_file="vaemodel_plane.ckpt"
145 | from plane_data2 import PlaneData
146 | dataset=PlaneData("plane.npz","env0.png")
147 | dataset.initialize()
148 | 
149 | # resume training
150 | #saver.restore(sess, ckpt_file)
151 | 
152 | # # TRAIN
153 | if True:
154 |   train_iters=50000
155 |   for i in range(int(train_iters)):
156 |     (x_val,u_val,x_next_val)=dataset.sample(batch_size)
157 |     #x_val=dataset.sample(batch_size)
158 |     feed_dict={
159 |       x:x_val
160 |     }
161 |     results=sess.run([loss,all_summaries,train_op],feed_dict)
162 |     writer.add_summary(results[1], i) # write summary data to disk
163 |     if i%1000==0:
164 |       print("iter=%d : Loss: %f" % (i,results[0]))
165 |   # save variables
166 |   print("Model saved in file: %s" % saver.save(sess,ckpt_file))
167 | 
168 | if True:
169 |   saver.restore(sess, ckpt_file)
170 |   (x_val,u_val,x_next_val)=dataset.sample(batch_size)
171 |   #x_val=dataset.sample(batch_size)
172 |   xr=sess.run(x_recons,{x:x_val})
173 |   fig,arr=plt.subplots(10,2)
174 |   for i in range(10):
175 |     arr[i,0].matshow(x_val[i,:].reshape((A,B)),cmap=plt.cm.gray, vmin=0, vmax=1)
176 |     arr[i,1].matshow(xr[i,:].reshape((A,B)),cmap=plt.cm.gray, vmin=0, vmax=1)
177 |   plt.show()
178 | 
179 | sess.close()


--------------------------------------------------------------------------------
/viz_results.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Quick-and-dirty visualization scripts for a variety of tasks.    
  3 | """
  4 | 
  5 | import sequential_e2c as e2c
  6 | #import e2c_plane_z as e2c
  7 | 
  8 | import matplotlib.pyplot as plt
  9 | import tensorflow as tf
 10 | import numpy as np
 11 | from data.plane_data2 import PlaneData, get_params
 12 | 
 13 | import ipdb as pdb
 14 | 
 15 | def show_recons_samples(sess, ckptfile):
 16 |   # visualize sample reconstructions
 17 |   e2c.saver.restore(sess, ckptfile) # restore variable values
 18 |   dataset=PlaneData("data/plane1.npz","data/env1.png")
 19 |   dataset.initialize()
 20 |   (x_val,u_val,x_next_val)=dataset.sample(e2c.batch_size)
 21 |   xr,xp=sess.run([e2c.x_recons, e2c.x_predict],feed_dict={e2c.x:x_val,e2c.u:u_val,e2c.x_next:x_next_val})
 22 |   #xr,xp=sess.run([e2c.x_recons0, e2c.x_predict0],feed_dict={e2c.x0:x_val,e2c.u0:u_val,e2c.x1:x_next_val})
 23 |   A,B=e2c.A,e2c.B
 24 |   def getimgs(x,xnext):
 25 |     padsize=1
 26 |     padval=.5
 27 |     ph=B+2*padsize
 28 |     pw=A+2*padsize
 29 |     img=np.ones((10*ph,2*pw))*padval
 30 |     for i in range(10):
 31 |       startr=i*ph+padsize
 32 |       img[startr:startr+B,padsize:padsize+A]=x[i,:].reshape((A,B))
 33 |     for i in range(10):
 34 |       startr=i*ph+padsize
 35 |       img[startr:startr+B,pw+padsize:pw+padsize+A]=xnext[i,:].reshape((A,B))
 36 |     return img
 37 |   fig,arr=plt.subplots(1,2)
 38 |   arr[0].matshow(getimgs(x_val,x_next_val),cmap=plt.cm.gray,vmin=0,vmax=1)
 39 |   arr[0].set_title('Data')
 40 |   arr[1].matshow(getimgs(xr,xp),cmap=plt.cm.gray,vmin=0,vmax=1)
 41 |   arr[1].set_title('Reconstruction')
 42 |   plt.show()
 43 | 
 44 | def show_recons_seq(sess, ckptfile):
 45 |   e2c.saver.restore(sess, ckptfile) # restore variable values
 46 |   dataset=PlaneData("data/plane1.npz","data/env1.png")
 47 |   dataset.initialize()
 48 |   T=e2c.T
 49 |   print(T)
 50 |   (x_vals,u_vals)=dataset.sample_seq(e2c.batch_size,T)
 51 |   feed_dict={}
 52 |   for t in range(T):
 53 |     feed_dict[e2c.xs[t]] = x_vals[t]
 54 |   for t in range(T-1):
 55 |     feed_dict[e2c.us[t]] = u_vals[t]
 56 |   
 57 |   fetches=e2c.x_recons + e2c.x_predicts
 58 |   results=sess.run(fetches,feed_dict)
 59 |   xr=results[:T-1]
 60 |   xp=results[T-1:]
 61 |   A,B=e2c.A,e2c.B
 62 |   def getimgs(x):
 63 |     padsize=1
 64 |     padval=.5
 65 |     ph=B+2*padsize
 66 |     pw=A+2*padsize
 67 |     img=np.ones((ph,len(x)*pw))*padval
 68 |     for t in range(len(x)):
 69 |       startc=t*pw+padsize
 70 |       img[padsize:padsize+B, startc:startc+A]=x[t][1,:].reshape((A,B))
 71 |     return img
 72 |   fig,arr=plt.subplots(3,1)
 73 |   arr[0].matshow(getimgs(x_vals),cmap=plt.cm.gray,vmin=0,vmax=1)
 74 |   arr[0].set_title('X')
 75 |   arr[1].matshow(getimgs(xr),cmap=plt.cm.gray,vmin=0,vmax=1)
 76 |   arr[1].set_title('Reconstruction')
 77 |   arr[2].matshow(getimgs(xp),cmap=plt.cm.gray,vmin=0,vmax=1)
 78 |   arr[2].set_title('Prediction')
 79 |   plt.show()
 80 | 
 81 | def viz_z(sess, ckptfile):
 82 |   e2c.saver.restore(sess,ckptfile) # restore variable values
 83 |   dataset=PlaneData("data/plane1.npz","data/env1.png")
 84 |   Ps,NPs=dataset.getPSpace()
 85 |   batch_size=e2c.batch_size
 86 |   n0=NPs.shape[0]
 87 |   if False:
 88 |     Ps=np.vstack((Ps,NPs))
 89 |   xy=np.zeros([Ps.shape[0], 2])
 90 |   xy[:,0]=Ps[:,1]
 91 |   xy[:,1]=20-Ps[:,0] # for the purpose of computing theta, map centered @ origin
 92 |   Zs=np.zeros([Ps.shape[0], e2c.z_dim])
 93 | 
 94 |   theta=np.arctan(xy[:,1]/xy[:,0])
 95 |   for i in range(Ps.shape[0] // batch_size):
 96 |     print("batch %d" % i)
 97 |     x_val=dataset.getXPs(Ps[i*batch_size:(i+1)*batch_size,:])
 98 |     Zs[i*batch_size:(i+1)*batch_size,:]=sess.run(e2c.z, {e2c.x:x_val})
 99 |   # last remaining points may not fit precisely into 1 minibatch.
100 |   x_val=dataset.getXPs(Ps[-batch_size:,:])
101 |   Zs[-batch_size:,:]=sess.run(e2c.z, {e2c.x:x_val})
102 | 
103 |   if False:
104 |     theta[-n0:]=1;
105 | 
106 |   fig,arr=plt.subplots(1,2)
107 |   arr[0].scatter(Ps[:,1], 40-Ps[:,0], c=(np.pi+theta)/(2*np.pi))
108 |   arr[0].set_title('True State Space')
109 |   arr[1].scatter(Zs[:,0],Zs[:,1], c=(np.pi+theta)/(2*np.pi))
110 |   arr[1].set_title('Latent Space Z')
111 |   #plt.show()
112 |   return fig
113 |  
114 | def viz_z_unfold(sess, cpktprefix):
115 |   d=100 # interval
116 |   for i in range(int(1e5) // d):
117 |     f="%s-%05d" % (cpktprefix,i*d)
118 |     ckptfile=f+".ckpt"
119 |     print(ckptfile)
120 |     fig=viz_z(sess,ckptfile)
121 |     fig.suptitle('%d'%(i*d))
122 |     fig.savefig(f+".png")
123 |     # combine with convert -delay 10 -loop 0 e2c-plane-*.png out.gif
124 |     # then reduce size using gifsicle -O3-colors 256 < out.gif > new.gif
125 |   print('done!')
126 | 
127 | if __name__=="__main__":
128 |   sess=tf.InteractiveSession()
129 |   #viz_z_unfold(sess, "/ltmp/e2c-plane")
130 |   fig=viz_z(sess,"/ltmp/e2c-plane-199000.ckpt")
131 |   #show_recons_samples(sess,"/ltmp/e2c-plane-186000.ckpt")
132 |   show_recons_seq(sess, "/ltmp/e2c-plane-199000.ckpt")
133 |   plt.show()
134 |   sess.close()


--------------------------------------------------------------------------------