├── .gitignore ├── LICENSE ├── README.md ├── data_reader.py └── test_data_reader.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Josh Tobin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Datasets used to train Generative Query Networks with Epipolar Cross Attention (E-GQNs) in the 'Geometry-Aware Neural Rendering' paper 2 | 3 | This repo is based on the dataset loading code from the GQN datasets found [here](https://github.com/deepmind/gqn-datasets). Note that the 4 | code in this repo can be used to load some of those datasets, but they must be downloaded separately. Follow the instructions in the original repo to do so. 5 | 6 | This code can be used to load the following datasets: 7 | 8 | + **rooms_ring_camera** from the original GQN paper 9 | + **rooms_free_camera** from the original GQN paper 10 | + **jaco** from the original GQN paper 11 | + **shepard_metzler_7_parts** from the original GQN paper 12 | + **openai_block**. A ShadowHand robot with a random physically sensible finger configuration is placed in the middle of the scene. A lettered cube is placed in the hand with a random orientation. 13 | The appearance of the scene is changed at each example by randomizing the lighting and textures of all bodies in the scene. This dataset is based on the one from the [HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/) 14 | gym environment. 15 | + **disco_humanoid**. A humanoid from the [Humanoid-v2](https://gym.openai.com/envs/Humanoid-v2/) gym environment is placed in the middle of the scene. All of its joints are configured randomly. 16 | The appearance of the scene is changed at each example by randomizing the lighting and textures of all bodies in the scene. 17 | + **rooms_random_objects**. One to four objects from the [ShapeNet](https://arxiv.org/abs/1512.03012) dataset are randomly oriented and dropped into the scene so they land with a random, but physically plausible orientation. 18 | The appearance of the scene is changed at each example by randomizing the lighting, texture of the walls, and textures of all of the objects. 19 | 20 | ## Usage example 21 | 22 | To stream a dataset from Google cloud storage: 23 | 24 | ```python 25 | dataset = rrc_debug() # Or rrc_train, oab_test, etc 26 | with tf.Session() as sess: 27 | sess.run(dataset.initializer) 28 | while True: 29 | sess.run(dataset.next_batch) # gives you a new batch each time 30 | ``` 31 | 32 | If you downloaded the datasets, you need to point the dataset constructors to the 33 | path where they are stored: 34 | 35 | ```python 36 | dataset = rrc_debug(dataset_root="/path/to/folder_containing_datasets/") 37 | ... 38 | ``` 39 | 40 | ## Download the datasets 41 | 42 | The code is set up to stream the data directly from the GS Buckets without manually downloading it. This is a good option for getting started, or if your training will take place on Google's cloud. 43 | 44 | If you are not training on Google cloud, this option will probably be slow. You can download the data by using `gsutil cp`. The `rooms_ring_camera`, `rooms_free_camera`, `jaco`, and `shepard_metzler_7_parts` 45 | datasets are located [here](https://console.cloud.google.com/storage/browser/gqn-dataset) and the `openai_block`, `disco_humanoid`, and `rooms_random_objects` datasets are located [here](https://console.cloud.google.com/storage/browser/egqn-datasets). 46 | 47 | See the `gsutil` [documentation](https://cloud.google.com/storage/docs/gsutil_install) for more information. 48 | 49 | ## Troubleshooting 50 | 51 | ### Loading data fails or hangs 52 | 53 | If you see an error message like: 54 | ``` 55 | The operation failed and will be automatically retried in 1.38118 seconds (attempt 1 out of 10), caused by: Unavailable: Error executing an HTTP request (HTTP response code 0, error code 6, error message 'Couldn't resolve host 'metadata'') 56 | ``` 57 | 58 | Then make sure you are logged into gcloud by running `gcloud auth application-default login` 59 | 60 | ### Other tensorflow issues 61 | 62 | This code was tested with tensorflow 1.13.1, try using that version. 63 | 64 | ## Reference 65 | 66 | Tobin, Josh, et al. "Geometry-Aware Neural Rendering." *Advances in Neural Information Processing Systems*. 2019. 67 | 68 | -------------------------------------------------------------------------------- /data_reader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from functools import partial 3 | import logging 4 | from os.path import join 5 | 6 | _NUM_CHANNELS = 3 7 | 8 | def _convert_frame_data(jpeg_data): 9 | decoded_frames = tf.image.decode_jpeg(jpeg_data) 10 | return tf.image.convert_image_dtype(decoded_frames, dtype=tf.uint8) 11 | 12 | GQN_DATASET_ROOT = 'gs://gqn-dataset/' 13 | EGQN_DATASET_ROOT = 'gs://egqn-datasets/' 14 | 15 | class Dataset: 16 | def __init__(self, tf_dataset, batch_size=64, 17 | name='dataset'): 18 | self.name = name 19 | self.batch_size = batch_size 20 | self._dataset = tf_dataset 21 | self._iterator = self._dataset.make_initializable_iterator() 22 | self.initializer = self._iterator.initializer 23 | self.next_batch = self._iterator.get_next() 24 | 25 | class GQNDataset(Dataset): 26 | def __init__(self, dataset_path, 27 | dataset_root=GQN_DATASET_ROOT, 28 | name="gqn_dataset", 29 | batch_size=64, 30 | sequence_size=10, 31 | context_size=10, 32 | image_size=64, 33 | fov=50., 34 | num_camera_params=5): 35 | self._dataset_root = dataset_root 36 | self._dataset_path = dataset_path 37 | self._sequence_size = sequence_size 38 | self._context_size = context_size 39 | self._image_size = image_size 40 | self._fov = fov 41 | self._num_camera_params = num_camera_params 42 | 43 | tf_dataset = self._create_dataset(batch_size) 44 | super().__init__(tf_dataset, name=name) 45 | 46 | def to_gpu(self, gpu_id): 47 | gpu_dset = self._dataset.apply(tf.contrib.data.prefetch_to_device(f'/gpu:{gpu_id}', 2)) 48 | return Dataset(gpu_dset, name=f'{self.name}_{gpu_id}', 49 | batch_size=self.batch_size) 50 | 51 | def _create_dataset(self, batch_size): 52 | dataset_paths = self._get_dataset_paths(self._dataset_root, self._dataset_path) 53 | dataset_paths_tf = tf.data.Dataset.from_tensor_slices(dataset_paths) 54 | dataset_paths_tf = dataset_paths_tf.shuffle(len(dataset_paths)) 55 | 56 | dataset = dataset_paths_tf.flat_map(tf.data.TFRecordDataset) 57 | dataset = dataset.map(self._parse_example, num_parallel_calls=10) 58 | dataset = dataset.shuffle(10000) 59 | dataset = dataset.batch(batch_size, drop_remainder=True) 60 | dataset = dataset.prefetch(8) 61 | return dataset 62 | 63 | def _get_dataset_paths(self, dataset_root, dataset_path): 64 | if isinstance(dataset_path, str): 65 | dataset_paths = join(dataset_root, dataset_path) 66 | if dataset_root.startswith('gs://'): 67 | dataset_paths = list(sorted(tf.gfile.Glob(dataset_paths))) 68 | else: 69 | dataset_paths = [datatset_paths] 70 | else: 71 | dataset_paths = [join(dataset_root, dp) for dp in dataset_path] 72 | 73 | return dataset_paths 74 | 75 | def _parse_example(self, example): 76 | """ 77 | Based on the data loader code from: 78 | https://github.com/deepmind/gqn-datasets 79 | """ 80 | feature_map = { 81 | 'frames': tf.FixedLenFeature( 82 | shape=self._sequence_size, dtype=tf.string), 83 | 'cameras': tf.FixedLenFeature( 84 | shape=[self._sequence_size * self._num_camera_params], 85 | dtype=tf.float32) 86 | } 87 | example = tf.parse_single_example(example, feature_map) 88 | indices = self._get_randomized_indices() 89 | frames = self._preprocess_frames(example, indices) 90 | cameras = self._preprocess_cameras(example, indices) 91 | result = {'context_frames': frames[:-1], 92 | 'context_cameras': cameras[:-1], 93 | 'query_camera': cameras[-1], 94 | 'context_fov': tf.ones([self._context_size], dtype=tf.float32) * self._fov, 95 | 'query_fov': tf.constant(self._fov, dtype=tf.float32), 96 | 'label': frames[-1]} 97 | return result 98 | 99 | def _get_randomized_indices(self): 100 | indices = tf.range(0, self._sequence_size) 101 | indices = tf.random_shuffle(indices) 102 | example_size = self._context_size + 1 103 | indices = tf.slice(indices, begin=[0], size=[example_size]) 104 | return indices 105 | 106 | def _preprocess_frames(self, example, indices): 107 | frames = example['frames'] 108 | frames = tf.gather(frames, indices, axis=0) 109 | frames = tf.map_fn(_convert_frame_data, frames, dtype=tf.uint8, back_prop=False) 110 | im_size = self._image_size 111 | img_shape = [self._context_size + 1, im_size, im_size, _NUM_CHANNELS] 112 | frames = tf.reshape(frames, img_shape) 113 | return frames 114 | 115 | def _preprocess_cameras(self, example, indices): 116 | pose = example['cameras'] 117 | pose = tf.reshape(pose, [self._sequence_size, self._num_camera_params]) 118 | pose = tf.gather(pose, indices, axis=0) 119 | # Data from rro and the original GQN datasets are stored like this 120 | if self._num_camera_params == 5: 121 | pos = pose[:, :3] 122 | yaw = pose[:, 3:4] 123 | pitch = pose[:, 4:5] 124 | # By design there's never any roll 125 | cameras = tf.concat([ 126 | pos, tf.sin(yaw), tf.cos(yaw), tf.sin(pitch), tf.cos(pitch)], axis=-1) 127 | return cameras 128 | # Data from oab and disco is stored in the format needed downstream 129 | elif self._num_camera_params == 9: 130 | return pose 131 | else: 132 | raise ValueError(f"Unexpected number of camera params {self._num_camera_params}") 133 | 134 | _rrc_dset = partial(GQNDataset, 135 | batch_size=9, # Per gpu 136 | sequence_size=10, 137 | context_size=4, 138 | image_size=64, 139 | fov=50., 140 | dataset_root=GQN_DATASET_ROOT, 141 | num_camera_params=5) 142 | rrc_train = partial(_rrc_dset, "rooms_ring_camera/train/*", 143 | name='rrc_train') 144 | rrc_test = partial(_rrc_dset, "rooms_ring_camera/test/*", 145 | name='rrc_test') 146 | rrc_debug = partial(_rrc_dset, 147 | "rooms_ring_camera/train/0001-of-2160.tfrecord", 148 | name='rrc_debug') 149 | 150 | _rfc_dset = partial(GQNDataset, 151 | batch_size=9, # Per gpu 152 | sequence_size=10, 153 | context_size=4, 154 | image_size=128, 155 | fov=50., 156 | dataset_root=GQN_DATASET_ROOT, 157 | num_camera_params=5) 158 | rfc_train = partial(_rfc_dset, 159 | "rooms_free_camera_with_object_rotations/train/*", 160 | name='rfc_train') 161 | rfc_test = partial(_rfc_dset, "rooms_free_camera_with_object_rotations/test/*", 162 | name='rfc_test') 163 | rfc_debug = partial(_rfc_dset, 164 | "rooms_free_camera_with_object_rotations/train/0001-of-2034.tfrecord", 165 | name='rfc_debug') 166 | 167 | _jaco_dset = partial(GQNDataset, 168 | batch_size=9, # Per gpu 169 | sequence_size=11, 170 | context_size=4, 171 | image_size=64, 172 | fov=50., 173 | dataset_root=GQN_DATASET_ROOT, 174 | num_camera_params=5) 175 | jaco_train = partial(_jaco_dset, 176 | "jaco/train/*", 177 | name='jaco_train') 178 | jaco_test = partial(_jaco_dset, "jaco/test/*", 179 | name='jaco_test') 180 | jaco_debug = partial(_jaco_dset, 181 | "jaco/train/0001-of-3600.tfrecord", 182 | name='jaco_debug') 183 | 184 | _sm7_dset = partial(GQNDataset, 185 | batch_size=9, # Per gpu 186 | sequence_size=15, 187 | context_size=4, 188 | image_size=64, 189 | fov=50., 190 | dataset_root=GQN_DATASET_ROOT, 191 | num_camera_params=5) 192 | sm7_train = partial(_sm7_dset, 193 | "shepard_metzler_7_parts/train/*", 194 | name="sm7_train") 195 | sm7_test = partial(_sm7_dset, 196 | "shepard_metzler_7_parts/test/*", 197 | name="sm7_test") 198 | sm7_debug = partial(_sm7_dset, 199 | "shepard_metzler_7_parts/train/001-of-900.tfrecord", 200 | name="sm7_debug") 201 | 202 | 203 | _oab_dset = partial(GQNDataset, 204 | batch_size=8, # Per gpu 205 | sequence_size=4, 206 | context_size=3, 207 | image_size=128, 208 | fov=21., 209 | dataset_root=EGQN_DATASET_ROOT, 210 | num_camera_params=9) 211 | oab_train = partial(_oab_dset, 212 | [f"openai-block/{i:04}-of-2500.tfrecord" for i in range(1, 2001)], 213 | name='oab_train') 214 | oab_test = partial(_oab_dset, 215 | [f"openai-block/{i:04}-of-2500.tfrecord" for i in range(2001, 2501)], 216 | name='oab_test') 217 | oab_debug = partial(_oab_dset, 218 | f"openai-block/0001-of-2500.tfrecord", 219 | name='oab_debug') 220 | 221 | 222 | _disco_dset = partial(GQNDataset, 223 | batch_size=8, # Per gpu 224 | sequence_size=4, 225 | context_size=3, 226 | image_size=128, 227 | fov=45., 228 | dataset_root=EGQN_DATASET_ROOT, 229 | num_camera_params=9) 230 | disco_train = partial(_disco_dset, 231 | [f'disco-humanoid/{i:04}-of-2500.tfrecord' for i in range(1, 2001)], 232 | name='disco_train') 233 | disco_test = partial(_disco_dset, 234 | [f'disco-humanoid/{i:04}-of-2500.tfrecord' for i in range(2001, 2501)], 235 | name='disco_test') 236 | disco_debug = partial(_disco_dset, 237 | f'disco-humanoid/0001-of-2500.tfrecord', 238 | name='disco_debug') 239 | 240 | _rro_dset = partial(GQNDataset, 241 | batch_size=9, # Per gpu 242 | sequence_size=4, 243 | context_size=3, 244 | image_size=128, 245 | fov=50., 246 | dataset_root=EGQN_DATASET_ROOT, 247 | num_camera_params=5) 248 | rro_train = partial(_rro_dset, 249 | [f'rooms-random-objects/{i:04}-of-1943.tfrecord' for i in range(2, 1601)], 250 | name='rro_train') 251 | rro_test = partial(_rro_dset, 252 | [f'rooms-random-objects/{i:04}-of-1943.tfrecord' for i in range(1601, 1944)], 253 | name='rro_test') 254 | rro_debug = partial(_rro_dset, 255 | f'rooms-random-objects/0002-of-1943.tfrecord', 256 | name='rro_debug') 257 | -------------------------------------------------------------------------------- /test_data_reader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from data_reader import (rrc_train, rrc_test, rrc_debug, 3 | rfc_train, rfc_test, rfc_debug, 4 | jaco_train, jaco_test, jaco_debug, 5 | sm7_train, sm7_test, sm7_debug, 6 | oab_train, oab_test, oab_debug, 7 | disco_train, disco_test, disco_debug, 8 | rro_train, rro_test, rro_debug) 9 | 10 | def render_one_batch(dset): 11 | with tf.Session() as sess: 12 | sess.run(dset.initializer) 13 | batch = sess.run(dset.next_batch) 14 | return batch 15 | 16 | 17 | def test_rrc(): 18 | render_one_batch(rrc_train()) 19 | render_one_batch(rrc_test()) 20 | render_one_batch(rrc_debug()) 21 | 22 | def test_rfc(): 23 | render_one_batch(rfc_train()) 24 | render_one_batch(rfc_test()) 25 | render_one_batch(rfc_debug()) 26 | 27 | def test_jaco(): 28 | render_one_batch(jaco_train()) 29 | render_one_batch(jaco_test()) 30 | render_one_batch(jaco_debug()) 31 | 32 | def test_sm7(): 33 | render_one_batch(sm7_train()) 34 | render_one_batch(sm7_test()) 35 | render_one_batch(sm7_debug()) 36 | 37 | def test_oab(): 38 | render_one_batch(oab_train()) 39 | render_one_batch(oab_test()) 40 | render_one_batch(oab_debug()) 41 | 42 | def test_disco(): 43 | render_one_batch(disco_train()) 44 | render_one_batch(disco_test()) 45 | render_one_batch(disco_debug()) 46 | 47 | def test_rro(): 48 | render_one_batch(rro_train()) 49 | render_one_batch(rro_test()) 50 | render_one_batch(rro_debug()) 51 | --------------------------------------------------------------------------------