├── .gitignore
├── LICENSE
├── LICENSE_DATASET
├── README.md
├── launch_configs
    ├── ray_gcp.yaml
    └── ray_local_cluster.yaml
├── requirements.txt
├── robonet
    ├── README
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── base_dataset.py
    │   ├── record_dataset.py
    │   ├── robonet_dataset.py
    │   ├── util
    │   │   ├── __init__.py
    │   │   ├── convert_all.sh
    │   │   ├── dataset_utils.py
    │   │   ├── hdf5_2_records.py
    │   │   ├── hdf5_loader.py
    │   │   ├── metadata_helper.py
    │   │   └── tensor_multiplexer.py
    │   └── variants
    │   │   ├── __init__.py
    │   │   ├── annotation_benchmark_dataset.py
    │   │   └── val_filter_dataset_variants.py
    ├── inverse_model
    │   ├── __init__.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── base_inverse_model.py
    │   │   ├── deterministic_inverse_model.py
    │   │   ├── discretized_inverse_model.py
    │   │   ├── graphs
    │   │   │   ├── __init__.py
    │   │   │   ├── base_graph.py
    │   │   │   └── lstm_baseline.py
    │   │   └── layers
    │   │   │   ├── __init__.py
    │   │   │   └── vgg_pretrain.py
    │   ├── testing
    │   │   ├── __init__.py
    │   │   └── action_inference_interface.py
    │   └── training
    │   │   ├── __init__.py
    │   │   └── inverse_trainable.py
    ├── video_prediction
    │   ├── __init__.py
    │   ├── flow_ops.py
    │   ├── functional_ops.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── deterministic_embedding_rnn_cell.py
    │   │   ├── dnaflow_rnn_cell.py
    │   │   ├── encoder_layers.py
    │   │   ├── normalization.py
    │   │   └── vgg_network.py
    │   ├── losses.py
    │   ├── metrics.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── base_model.py
    │   │   ├── deterministc_embedding_utils.py
    │   │   ├── deterministic_generator.py
    │   │   └── graphs
    │   │   │   ├── __init__.py
    │   │   │   ├── base_graph.py
    │   │   │   ├── deterministic_graph.py
    │   │   │   ├── dnaflow_graph.py
    │   │   │   └── vgg_conv_graph.py
    │   ├── ops.py
    │   ├── rnn_ops.py
    │   ├── testing
    │   │   ├── __init__.py
    │   │   └── model_evaluation_interface.py
    │   ├── training
    │   │   ├── __init__.py
    │   │   ├── data_filter.py
    │   │   ├── finetuning_trainable_interface.py
    │   │   ├── ray_util
    │   │   │   ├── __init__.py
    │   │   │   └── gif_logger.py
    │   │   ├── trainable_interface.py
    │   │   └── util.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── encode_img.py
    │   │   ├── ffmpeg_gif.py
    │   │   ├── html.py
    │   │   └── tf_utils.py
    └── yaml_util.py
├── robonet_experiments
    ├── classifier_control
    │   └── params.yaml
    ├── gpu
    │   ├── capacity_test
    │   │   ├── base_model
    │   │   │   ├── flow.yaml
    │   │   │   └── noflow.yaml
    │   │   └── large_model
    │   │   │   ├── flow.yaml
    │   │   │   └── noflow.yaml
    │   ├── finetune_baxter.yaml
    │   ├── pretrain_models
    │   │   ├── all_robonet
    │   │   │   ├── large.yaml
    │   │   │   └── medium.yaml
    │   │   └── autograsp
    │   │   │   ├── large.yaml
    │   │   │   └── medium.yaml
    │   └── sawyer_grid_search.yaml
    ├── inverse_model
    │   ├── discretized_inverse.yaml
    │   └── inverse.yaml
    └── tpu
    │   ├── capacity_test_flow.yaml
    │   └── capacity_test_noflow.yaml
├── scripts
    ├── examples
    │   ├── create_prediction_gifs.py
    │   └── test_franka_flow.py
    ├── templates
    │   ├── index_template.html
    │   └── traj_template.html
    ├── train_model.py
    ├── train_vpred_tpu.py
    └── visualize_dataset.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.gif
2 | *.pyc
3 | __pycache__/*
4 | *.egg-info/
5 | .idea/
6 | scratch/
7 | node_modules
8 | bower_components
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Sudeep Dasari, Frederik Ebert, Stephen Tian
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RoboNet
 2 | Code for loading and manipulating the RoboNet dataset, as well as for training supervised inverse models and video prediction models on the dataset.
 3 | 
 4 | Please refer to the [project wiki](https://github.com/SudeepDasari/RoboNet/wiki) for more detailed documentation.
 5 | 
 6 | If you find the codebase or dataset useful please consider citing our paper.
 7 | ```
 8 | @inproceedings{dasari2019robonet,
 9 |     title={RoboNet: Large-Scale Multi-Robot Learning},
10 |     author={Sudeep Dasari and Frederik Ebert and Stephen Tian and Suraj Nair and Bernadette Bucher and Karl Schmeckpeper and Siddharth Singh and Sergey Levine and Chelsea Finn},
11 |     year={2019},
12 |     eprint={1910.11215},
13 |     archivePrefix={arXiv},
14 |     primaryClass={cs.RO},
15 |     booktitle={CoRL 2019: Volume 100 Proceedings of Machine Learning Research}
16 | }
17 | ```
18 | 
19 | ## Downloading the Dataset
20 | You can find instructions for downloading the dataset on the [project wiki](https://github.com/SudeepDasari/RoboNet/wiki/Getting-Started) as well. All data is provided under the [Creative Commons BY 4.0](https://creativecommons.org/licenses/by/4.0/legalcode) license.
21 | 


--------------------------------------------------------------------------------
/launch_configs/ray_gcp.yaml:
--------------------------------------------------------------------------------
  1 | # An unique identifier for the head node and workers of this cluster.
  2 | cluster_name: gcpcluster
  3 | 
  4 | # The minimum number of workers nodes to launch in addition to the head
  5 | # node. This number should be >= 0.
  6 | min_workers: 4
  7 | 
  8 | # The maximum number of workers nodes to launch in addition to the head
  9 | # node. This takes precedence over min_workers.
 10 | max_workers: 4
 11 | 
 12 | # The initial number of worker nodes to launch in addition to the head
 13 | # node. When the cluster is first brought up (or when it is refreshed with a
 14 | # subsequent `ray up`) this number of nodes will be started.
 15 | initial_workers: 4
 16 | 
 17 | # This executes all commands on all nodes in the docker container,
 18 | # and opens all the necessary ports to support the Ray cluster.
 19 | # Empty string means disabled.
 20 | docker:
 21 |     image: ""
 22 |     container_name: "" # e.g. ray_docker
 23 |     # container_name: "softlearning"
 24 | 
 25 | # The autoscaler will scale up the cluster to this target fraction of resource
 26 | # usage. For example, if a cluster of 10 nodes is 100% busy and
 27 | # target_utilization is 0.8, it would resize the cluster to 13. This fraction
 28 | # can be decreased to increase the aggressiveness of upscaling.
 29 | # This value must be less than 1.0 for scaling to happen.
 30 | target_utilization_fraction: 0.8
 31 | 
 32 | # If a node is idle for this many minutes, it will be removed.
 33 | idle_timeout_minutes: 5
 34 | 
 35 | # Cloud-provider specific configuration.
 36 | provider:
 37 |     type: gcp
 38 |     region: us-central1
 39 |     availability_zone: us-central1-a
 40 |     project_id: visualmpc-210823
 41 | 
 42 | # How Ray will authenticate with newly launched nodes.
 43 | auth:
 44 |     ssh_user: sudeep
 45 | # By default Ray creates a new private keypair, but you can also use your own.
 46 | # If you do so, make sure to also set "KeyName" in the head and worker node
 47 | # configurations below.
 48 | #    ssh_private_key: /path/to/your/key.pem
 49 | 
 50 | # Provider-specific config for the head node, e.g. instance type. By default
 51 | # Ray will auto-configure unspecified fields such as SubnetId and KeyName.
 52 | # For more documentation on available fields, see:
 53 | # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
 54 | head_node:
 55 |     machineType: n1-standard-4 # n1-highcpu-16
 56 |     disks:
 57 |       - boot: true
 58 |         autoDelete: true
 59 |         type: PERSISTENT
 60 |         initializeParams:
 61 |           diskSizeGb: 1000
 62 |           # See https://cloud.google.com/compute/docs/images for more images
 63 |           sourceImage: projects/visualmpc-210823/global/images/robonet-image-newray
 64 | 
 65 |     # Additional options can be found in in the compute docs at
 66 |     # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
 67 | 
 68 | # Provider-specific config for worker nodes, e.g. instance type. By default
 69 | # Ray will auto-configure unspecified fields such as SubnetId and KeyName.
 70 | # For more documentation on available fields, see:
 71 | # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
 72 | worker_nodes:
 73 |     machineType: n1-standard-16 # n1-highcpu-8
 74 |     disks:
 75 |       - boot: true
 76 |         autoDelete: true
 77 |         type: PERSISTENT
 78 |         initializeParams:
 79 |           diskSizeGb: 1000
 80 |           # See https://cloud.google.com/compute/docs/images for more images
 81 |           sourceImage: projects/visualmpc-210823/global/images/robonet-image-newray
 82 |     # workers have p100
 83 |     guestAccelerators:
 84 |       - acceleratorType: projects/visualmpc-210823/zones/us-central1-a/acceleratorTypes/nvidia-tesla-v100
 85 |         acceleratorCount: 2
 86 |     # Run workers on preemtible instance by default.
 87 |     # Note that GCP preemptible instances automatically shut down after 24h.
 88 |     # Comment this out to use on-demand.
 89 |     scheduling:
 90 |      - preemptible: true
 91 |      - onHostMaintenance: TERMINATE
 92 | 
 93 |     # Additional options can be found in in the compute docs at
 94 |     # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
 95 | 
 96 | # Files or directories to copy to the head and worker nodes. The format is a
 97 | # dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
 98 | file_mounts: {}
 99 | 
100 | # List of shell commands to run to set up nodes.
101 | setup_commands:
102 |     - >-
103 |       pip install cryptography 
104 |       && pip install --upgrade google-api-python-client
105 |       && rm -rf ~/ray_results
106 |       && cd ~/Documents/RoboNet
107 |       && git stash
108 |       && git pull origin inverse_model
109 | 
110 | 
111 | # Custom commands that will be run on the head node after common setup.
112 | head_setup_commands: []
113 | 
114 | # Custom commands that will be run on worker nodes after common setup.
115 | worker_setup_commands: []
116 | 
117 | # Command to start ray on the head node. You don't need to change this.
118 | head_start_ray_commands:
119 |     - ray stop
120 |     - >-
121 |         ray start
122 |         --head
123 |         --redis-port=6379
124 |         --object-manager-port=8076
125 |         --autoscaling-config=~/ray_bootstrap_config.yaml
126 |         --internal-config={\"initial_reconstruction_timeout_milliseconds\":2000\,\"num_heartbeats_timeout\":100}
127 | 
128 | # Command to start ray on worker nodes. You don't need to change this.
129 | worker_start_ray_commands:
130 |     - ray stop
131 |     - >-
132 |         ray start
133 |         --redis-address=$RAY_HEAD_IP:6379
134 |         --object-manager-port=8076
135 | 


--------------------------------------------------------------------------------
/launch_configs/ray_local_cluster.yaml:
--------------------------------------------------------------------------------
 1 | cluster_name: default
 2 | min_workers: 1
 3 | max_workers: 4
 4 | docker:
 5 |     image: ""
 6 |     container_name: ""
 7 | target_utilization_fraction: 0.8
 8 | idle_timeout_minutes: 5
 9 | provider:
10 |     type: local
11 |     head_ip: deepthought
12 |     worker_ips: [newton5]
13 | auth:
14 |     ssh_user: sudeep
15 |     ssh_private_key: ~/.ssh/id_rsa
16 | head_node: {}
17 | worker_nodes: {}
18 | file_mounts: {}
19 | head_setup_commands: []
20 | worker_setup_commands: []
21 | initialization_commands: []
22 | setup_commands:
23 |     - source ~/rayrc && cd ~/Documents/RoboNet && git checkout ray && git pull origin ray
24 | #    - source activate ray && cd ray/python && pip install -e .
25 | head_start_ray_commands:
26 |     - source ~/rayrc && ray stop
27 |     - source ~/rayrc && ulimit -c unlimited && ray start --head --redis-port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml
28 | worker_start_ray_commands:
29 |     - source ~/rayrc && ray stop
30 |     - source ~/rayrc && ray start --redis-address=$RAY_HEAD_IP:6379
31 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorflow-gpu==1.14
 2 | opencv-python
 3 | scipy
 4 | scikit-image
 5 | h5py
 6 | imageio-ffmpeg
 7 | pandas
 8 | tqdm
 9 | requests
10 | ray
11 | 


--------------------------------------------------------------------------------
/robonet/README:
--------------------------------------------------------------------------------
1 | #for setup install
2 | sudo apt-get install ffmpeg
3 | 


--------------------------------------------------------------------------------
/robonet/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from robonet.video_prediction.training import GIFLogger
 3 |     from robonet.video_prediction.training import get_trainable as vpred_trainable
 4 |     from robonet.inverse_model.training import get_trainable as inverse_trainable
 5 | except:
 6 |     print('could not import trainables!')
 7 | 
 8 | 
 9 | def get_trainable(class_name):
10 |     available_trainables = [vpred_trainable, inverse_trainable]
11 |     for a in available_trainables:
12 |         try:
13 |             return a(class_name)
14 |         except NotImplementedError:
15 |             pass
16 |     raise NotImplementedError
17 | 
18 | 


--------------------------------------------------------------------------------
/robonet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .util.metadata_helper import load_metadata
 2 | 
 3 | 
 4 | def get_dataset_class(name):
 5 |     if name == 'RoboNet':
 6 |         from .robonet_dataset import RoboNetDataset
 7 |         return RoboNetDataset
 8 |     elif name == 'AnnotatedRoboNet':
 9 |         from .variants.annotation_benchmark_dataset import AnnotationBenchmarkDataset
10 |         return AnnotationBenchmarkDataset
11 |     elif name == 'AnnotationHeldoutRobotDataset':
12 |         from .variants.val_filter_dataset_variants import AnnotationHeldoutRobotDataset
13 |         return AnnotationHeldoutRobotDataset
14 |     elif name == 'HeldoutRobotDataset':
15 |         from .variants.val_filter_dataset_variants import HeldoutRobotDataset
16 |         return HeldoutRobotDataset
17 |     elif name == 'TPU' or name == 'TFRecords':
18 |         from .record_dataset import TFRecordVideoDataset
19 |         return TFRecordVideoDataset
20 |     else:
21 |         raise NotImplementedError
22 | 


--------------------------------------------------------------------------------
/robonet/datasets/base_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tensorflow as tf
  3 | from tensorflow.contrib.training import HParams
  4 | import glob
  5 | import copy
  6 | from .util.metadata_helper import load_metadata, MetaDataContainer
  7 | import random
  8 | import numpy as np
  9 | 
 10 | 
 11 | class BaseVideoDataset(object):
 12 |     def __init__(self, batch_size, dataset_files_or_metadata, hparams=dict()):
 13 |         assert isinstance(batch_size, int), "batch_size must be an integer"
 14 |         self._batch_size = batch_size
 15 | 
 16 |         if isinstance(dataset_files_or_metadata, str):
 17 |             self._metadata = [load_metadata(dataset_files_or_metadata)]
 18 |         elif isinstance(dataset_files_or_metadata, MetaDataContainer):
 19 |             self._metadata = [dataset_files_or_metadata]
 20 |         elif isinstance(dataset_files_or_metadata, (list, tuple)):
 21 |             self._metadata = []
 22 |             for d in dataset_files_or_metadata:
 23 |                 assert isinstance(d, (str, MetaDataContainer)), "potential dataset must be folder containing files or meta-data instance"
 24 |                 if isinstance(d, str):
 25 |                     self._metadata.append(load_metadata(d))
 26 |                 else:
 27 |                     self._metadata.append(d)
 28 | 
 29 |         # initialize hparams and store metadata_frame
 30 |         self._hparams = self._get_default_hparams().override_from_dict(hparams)
 31 | 
 32 |         self._init_rng()
 33 | 
 34 |         #initialize dataset
 35 |         self._num_ex_per_epoch = self._init_dataset()
 36 |         print('loaded {} train files'.format(self._num_ex_per_epoch))
 37 | 
 38 |     def _init_dataset(self):
 39 |         return 0
 40 | 
 41 |     def _init_rng(self):
 42 |         # if RNG is not supplied then initialize new RNG
 43 |         self._random_generator = {}
 44 |         
 45 |         seeds = [None for _ in range(len(self.modes) + 1)]
 46 |         if self._hparams.RNG:
 47 |             seeds = [i + self._hparams.RNG for i in range(len(seeds))]
 48 |         
 49 |         for k, seed in zip(self.modes + ['base'], seeds):
 50 |             if k == 'train' and self._hparams.use_random_train_seed:
 51 |                 seed = None
 52 |             self._random_generator[k] = random.Random(seed)
 53 |         self._np_rng = np.random.RandomState(self._random_generator['base'].getrandbits(32))
 54 |     
 55 |     def _get(self, key, mode):
 56 |         raise NotImplementedError
 57 | 
 58 |     @staticmethod
 59 |     def _get_default_hparams():
 60 |         default_dict = {
 61 |             'RNG': 11381294392481135266,
 62 |             'use_random_train_seed': False
 63 |         }
 64 |         return HParams(**default_dict)
 65 |     
 66 |     def get(self, key, mode='train'):
 67 |         if mode not in self.modes:
 68 |             raise ValueError('Mode {} not valid! Dataset has following modes: {}'.format(mode, self.modes))
 69 |         return self._get(key, mode)
 70 | 
 71 |     def __getitem__(self, item):
 72 |         if isinstance(item, tuple):
 73 |             if len(item) != 2:
 74 |                 raise KeyError('Index should be in format: [Key, Mode] or [Key] (assumes default train mode)')
 75 |             key, mode = item
 76 |             return self.get(key, mode)
 77 | 
 78 |         return self.get(item)
 79 |     
 80 |     def __contains__(self, item):
 81 |         raise NotImplementedError
 82 | 
 83 |     @property
 84 |     def batch_size(self):
 85 |         return self._batch_size
 86 | 
 87 |     @property
 88 |     def hparams(self):
 89 |         return copy.deepcopy(self._hparams)
 90 | 
 91 |     @property
 92 |     def num_examples_per_epoch(self):
 93 |         return self._num_ex_per_epoch
 94 |     
 95 |     @property
 96 |     def modes(self):
 97 |         return ['train', 'val', 'test']
 98 | 
 99 |     @property
100 |     def primary_mode(self):
101 |         return 'train'
102 | 
103 |     def build_feed_dict(self, mode):
104 |         return {}
105 | 


--------------------------------------------------------------------------------
/robonet/datasets/record_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tensorflow as tf
  3 | from tensorflow.contrib.training import HParams
  4 | import glob
  5 | from robonet.datasets.base_dataset import BaseVideoDataset
  6 | import random
  7 | import functools
  8 | import json
  9 | from robonet.datasets.util.dataset_utils import color_augment
 10 | 
 11 | 
 12 | class TFRecordVideoDataset(BaseVideoDataset):
 13 |     def __init__(self, dataset_batches, dataset_paths, hparams=dict()):
 14 |         self._hparams = self._get_default_hparams().override_from_dict(hparams)                 # initialize hparams and store metadata_frame
 15 |         self._init_rng()                                                                        # init rng objects
 16 |         
 17 |         assert isinstance(dataset_batches, (list, tuple)), "dataset_batches must be a list of batch_sizes per source"
 18 |         assert isinstance(dataset_paths, (list, tuple)), "dataset_batches must be a list of paths per source"
 19 |         self._batch_size = sum(dataset_batches)
 20 | 
 21 |         self._source_batch_sizes = dataset_batches
 22 |         self._source_dataset_paths = dataset_paths
 23 | 
 24 |         self._init_dataset()
 25 | 
 26 |     def _init_dataset(self):
 27 |         self._mode_datasets = {}
 28 |         for m in self.modes:
 29 |             self._mode_datasets[m] = []
 30 | 
 31 |         for batch_size, dataset_path in zip(self._source_batch_sizes, self._source_dataset_paths):
 32 |             assert batch_size > 0
 33 |             assert 0 < self._hparams.train_frac < 1
 34 |             assert self._hparams.load_T > 1
 35 | 
 36 |             dataset_metadata = json.load(open('{}/format.json'.format(dataset_path), 'r'))
 37 |             
 38 |             if self._hparams.bucket_dir:
 39 |                 print('loading files from: {}'.format(dataset_path + '/files.json'))
 40 |                 all_files = json.load(open(dataset_path + '/files.json'))
 41 |                 all_files = ['{}/{}'.format(self._hparams.bucket_dir, f) for f in all_files]
 42 |             else:
 43 |                 all_files = glob.glob('{}/*.tfrecord'.format(dataset_path))
 44 |             all_files.sort(key=lambda x: x.split('/')[-1])
 45 |             
 46 |             self._random_generator['base'].shuffle(all_files)
 47 |             pivot = max(int(len(all_files) * self._hparams.train_frac), 1)
 48 |             train_f, val_f = all_files[:pivot], all_files[pivot:]
 49 | 
 50 |             self._random_generator['val'].shuffle(val_f)
 51 |             self._random_generator['train'].shuffle(train_f)
 52 | 
 53 |             for m, files in zip(self.modes, [train_f, val_f]):
 54 |                 outputs = self._build_dataset(files, m, dataset_metadata, batch_size)
 55 | 
 56 |                 # enforces static shapes constraint
 57 |                 height, width = dataset_metadata['img_dim']
 58 |                 outputs['images'] = tf.cast(tf.reshape(outputs['images'], [batch_size, self._hparams.load_T, height, width, 3]), tf.float32) / 255
 59 |                 if self._hparams.color_augmentation:
 60 |                     outputs['images'] = color_augment(outputs['images'], self._hparams.color_augmentation)
 61 |                 outputs['actions'] = tf.reshape(outputs['actions'], [batch_size, self._hparams.load_T - 1, dataset_metadata['adim']])
 62 |                 outputs['states'] = tf.reshape(outputs['states'], [batch_size, self._hparams.load_T, dataset_metadata['sdim']])
 63 | 
 64 |                 self._mode_datasets[m].append(outputs)
 65 | 
 66 |         for m in self.modes:
 67 |             tensor_list = self._mode_datasets.pop(m)
 68 |             self._mode_datasets[m] = {}
 69 |             for key in ['images', 'states', 'actions']:
 70 |                 self._mode_datasets[m][key] = tf.concat([out_dict[key] for out_dict in tensor_list], axis=0)
 71 |     
 72 |     def _build_dataset(self, files, mode, dataset_metadata, batch_size):
 73 |         dataset = tf.data.Dataset.from_tensor_slices(files)
 74 |         if mode == 'train':
 75 |             dataset = dataset.repeat(self._hparams.n_epochs)
 76 |         else:
 77 |             dataset = dataset.repeat(None)    # always have infinite val records
 78 | 
 79 |         ignore_order = tf.data.Options()
 80 |         ignore_order.experimental_deterministic = False
 81 |         dataset = dataset.with_options(ignore_order)
 82 |         dataset = dataset.interleave(tf.data.TFRecordDataset, 
 83 |                                     cycle_length=min(len(files), 32),
 84 |                                     num_parallel_calls=tf.data.experimental.AUTOTUNE)
 85 | 
 86 |         parse_fn = functools.partial(self._parse_records, metadata=dataset_metadata)
 87 |         dataset = dataset.map(parse_fn)
 88 |         dataset = dataset.shuffle(buffer_size=self._hparams.shuffle_buffer)
 89 |         dataset = dataset.batch(batch_size, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
 90 |         outputs = dataset.make_one_shot_iterator().get_next()
 91 |         return outputs
 92 | 
 93 |     def _parse_records(self, serialized_example, metadata):
 94 |         feat_names = {}
 95 |         feat_names['images'] = tf.FixedLenFeature([], tf.string)
 96 |         feat_names['actions'] = tf.FixedLenFeature([(metadata['T'] - 1) * metadata['adim']], tf.float32)
 97 |         feat_names['states'] = tf.FixedLenFeature([metadata['T'] * metadata['sdim']], tf.float32)
 98 | 
 99 |         feature = tf.parse_single_example(serialized_example, features=feat_names)
100 | 
101 |         rand_start = tf.random.uniform((), 0, metadata['T'] - self._hparams.load_T, dtype=tf.int32)
102 |         rand_cam = tf.random.uniform((), 0, metadata['ncam'], dtype=tf.int32)
103 | 
104 |         decoded_feat = {}
105 |         height, width = metadata['img_dim']
106 | 
107 |         vid_decode = tf.reshape(tf.image.decode_jpeg(feature['images'], channels=3), (metadata['T'] * metadata['ncam'] * height, width, 3))
108 |         decoded_feat['images'] = tf.reshape(vid_decode, [metadata['T'], metadata['ncam'], height, width, 3])[rand_start:rand_start+self._hparams.load_T, rand_cam]
109 |         decoded_feat['actions'] = tf.reshape(feature['actions'], [metadata['T'] - 1, metadata['adim']])[rand_start:rand_start+self._hparams.load_T - 1]
110 |         decoded_feat['states'] = tf.reshape(feature['states'], [metadata['T'], metadata['sdim']])[rand_start:rand_start+self._hparams.load_T]
111 | 
112 |         return decoded_feat
113 | 
114 |     def _get(self, key, mode):
115 |         return self._mode_datasets[mode][key]
116 | 
117 |     @staticmethod
118 |     def _get_default_hparams():
119 |         default_dict = {
120 |             'RNG': 11381294392481135266,
121 |             'use_random_train_seed': False,
122 |             'shuffle_buffer': 500,
123 |             'n_epochs': None,
124 |             'buffer_size': 10,
125 |             'train_frac': 0.9,                  # train, val
126 |             'load_T': 15,
127 |             'bucket_dir': '',
128 |             'color_augmentation': 0
129 |         }
130 |         return HParams(**default_dict)
131 |     
132 |     def __contains__(self, item):
133 |         return item in ['images', 'actions', 'states']
134 | 
135 |     @property
136 |     def modes(self):
137 |         return ['train', 'val']
138 | 
139 |     @property   
140 |     def num_examples_per_epoch(self):
141 |         raise NotImplementedError
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     import argparse
146 |     import imageio
147 |     import numpy as np
148 |     import time
149 | 
150 | 
151 |     parser = argparse.ArgumentParser(description="tfrecord dataset tester")
152 |     parser.add_argument('--path', type=str, required=True, help='path to tfrecord files')
153 |     parser.add_argument('--batch_size', type=int, default=10, help='batch size for loaded data')
154 |     args = parser.parse_args()
155 | 
156 |     loader = TFRecordVideoDataset([args.batch_size], [args.path], {'train_frac': 0.5, 'shuffle_buffer': 10})
157 |     print(loader['images'], loader['actions'], loader['states'])
158 |     s = tf.Session()
159 |     for j in range(10):
160 |         t = time.time()
161 |         img, act, state = s.run([loader['images'], loader['actions'], loader['states']])
162 |         print(time.time() - t)
163 |         print('actions', act)
164 |         print('state', state)
165 |     
166 |         w = imageio.get_writer('./out{}.gif'.format(j))
167 |         for t in range(img.shape[1]):
168 |             w.append_data((np.concatenate(img[:, t], axis=-2) * 255).astype(np.uint8))
169 | 
170 |     import pdb; pdb.set_trace()
171 |     print(img.shape)
172 | 


--------------------------------------------------------------------------------
/robonet/datasets/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudeepDasari/RoboNet/d83eee20f39653c3f8e7c349df7350e8a9e9f7a7/robonet/datasets/util/__init__.py


--------------------------------------------------------------------------------
/robonet/datasets/util/convert_all.sh:
--------------------------------------------------------------------------------
1 | python robonet/datasets/util/hdf5_2_records.py ~/hdf5 --robot sawyer --save_dir records_all_small/sawyer --n_workers 40;
2 | python robonet/datasets/util/hdf5_2_records.py ~/hdf5 --robot kuka --save_dir records_all_small/kuka --n_workers 40;
3 | python robonet/datasets/util/hdf5_2_records.py ~/hdf5 --robot R3 --save_dir records_all_small/R3 --n_workers 40;
4 | python robonet/datasets/util/hdf5_2_records.py ~/hdf5 --robot widowx --save_dir records_all_small/widowx --n_workers 40;
5 | python robonet/datasets/util/hdf5_2_records.py ~/hdf5 --robot baxter --save_dir records_all_small/baxter --n_workers 40;
6 | python robonet/datasets/util/hdf5_2_records.py ~/hdf5 --robot fetch --save_dir records_all_small/fetch --n_workers 40;
7 | python robonet/datasets/util/hdf5_2_records.py ~/hdf5 --robot franka --save_dir records_all_small/franka --n_workers 40;
8 | 


--------------------------------------------------------------------------------
/robonet/datasets/util/dataset_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import pdb
 3 | import numpy as np
 4 | 
 5 | 
 6 | def color_augment(image, noise_range=0.2):
 7 |     assert noise_range > 0, "noise_range must be positive"
 8 |     
 9 |     bs = image.get_shape().as_list()[0]
10 |     shape = [bs] + [1 for _ in range(len(image.get_shape().as_list()) - 1)]
11 |     min_noise = -noise_range
12 |     max_noise = noise_range
13 |     rand_h = tf.random_uniform(shape, minval=min_noise, maxval=max_noise)
14 |     rand_s = tf.random_uniform(shape, minval=min_noise, maxval=max_noise)
15 |     rand_v = tf.random_uniform(shape, minval=min_noise, maxval=max_noise)
16 |     image_hsv = tf.image.rgb_to_hsv(image)
17 |     h_, s_, v_ = tf.split(image_hsv, 3, -1)
18 |     stack_mod = tf.clip_by_value(tf.concat([h_ + rand_h, s_ + rand_s, v_ + rand_v], axis=-1), 0, 1.)
19 |     image_rgb = tf.image.hsv_to_rgb(stack_mod)
20 |     return image_rgb
21 | 
22 | 
23 | def split_train_val_test(metadata, splits=None, train_ex=None, rng=None):
24 |     assert (splits is None) != (train_ex is None), "exactly one of splits or train_ex should be supplied"
25 |     files = metadata.get_shuffled_files(rng)
26 |     train_files, val_files, test_files = None, None, None
27 | 
28 |     if splits is not None:
29 |         assert len(splits) == 3, "function requires 3 split parameteres ordered (train, val ,test)"
30 |         splits = np.cumsum([int(i * len(files)) for i in splits]).tolist()
31 |     else:
32 |         assert len(files) >= train_ex, "not enough files for train examples!"
33 |         val_split = int(0.5 * (len(files) + train_ex))
34 |         splits = [train_ex, val_split, len(files)]
35 |     
36 |     # give extra fat to val set
37 |     if splits[-1] < len(files):
38 |         diff = len(files) - splits[-1]
39 |         for i in range(1, len(splits)):
40 |             splits[i] += diff
41 |     
42 |     if splits[0]:
43 |         train_files = files[:splits[0]]
44 |     if splits[1]:
45 |         val_files = files[splits[0]: splits[1]]
46 |     if splits[2]:
47 |         test_files = files[splits[1]: splits[2]]
48 |     
49 |     return train_files, val_files, test_files
50 | 


--------------------------------------------------------------------------------
/robonet/datasets/util/hdf5_2_records.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Converts data from hdf5 format to TFRecord format
  3 | """
  4 | 
  5 | import tensorflow as tf
  6 | from robonet.datasets.util.hdf5_loader import load_data, default_loader_hparams
  7 | from tqdm import tqdm
  8 | import cv2
  9 | 
 10 | 
 11 | def float_feature(value):
 12 |     return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 13 | 
 14 | 
 15 | def bytes_feature(value):
 16 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 17 | 
 18 | 
 19 | def save_record(filename, trajs):
 20 |     writer = tf.python_io.TFRecordWriter(filename)
 21 |     for traj in tqdm(trajs):
 22 |         images, actions, states = traj
 23 |         image_bytes = cv2.imencode('.jpg', images.reshape((-1, images.shape[-2], images.shape[-1]))[:, :, ::-1])[1].tostring()
 24 | 
 25 |         feature = {}
 26 |         feature['images'] = bytes_feature(image_bytes)
 27 |         feature['actions'] = float_feature(actions.flatten().tolist())
 28 |         feature['states'] = float_feature(states.flatten().tolist())
 29 |         example = tf.train.Example(features=tf.train.Features(feature=feature))
 30 |         writer.write(example.SerializeToString())
 31 |     writer.close()
 32 | 
 33 | 
 34 | def _load_hdf5(inputs):
 35 |     if len(inputs) == 3:
 36 |         f_name, file_metadata, hparams = inputs
 37 |         return load_data(f_name, file_metadata, hparams)
 38 |     elif len(inputs) == 4:
 39 |         f_name, file_metadata, hparams, rng = inputs
 40 |         return load_data(f_name, file_metadata, hparams, rng)
 41 |     raise ValueError
 42 | 
 43 | 
 44 | if __name__ == '__main__':
 45 |     import argparse
 46 |     from robonet.datasets import load_metadata
 47 |     from tensorflow.contrib.training import HParams
 48 |     import multiprocessing
 49 |     import json
 50 |     import copy
 51 |     import random
 52 |     import os
 53 | 
 54 | 
 55 |     parser = argparse.ArgumentParser(description="converts data into tfrecord format for fast TPU loading")
 56 |     parser.add_argument('path', type=str, default='./', help='path to input file archive')
 57 |     parser.add_argument('--robot', type=str, default='', help='if flag supplied only converts data corresponding to given robot')
 58 |     parser.add_argument('--filter_primitive', type=str, default='', help='if flag supplied only converts data with given primitive')
 59 |     parser.add_argument('--n_workers', type=int, default=1, help='number of worker threads')
 60 |     parser.add_argument('--target_adim', type=int, default=5, help='target action dimension for loading')
 61 |     parser.add_argument('--target_sdim', type=int, default=5, help='target state dimension for loading')
 62 |     parser.add_argument('--img_dims', type=int, nargs='+', default=[48, 64], help='(height, width) to resize images')
 63 |     parser.add_argument('--save_dir', type=str, default='./', help='where to save records')
 64 |     parser.add_argument('--ex_per_record', type=int, default=512, help='examples per record file')
 65 |     args = parser.parse_args()
 66 | 
 67 |     name_dir = 'record_names/' + '/'.join(args.save_dir.split('/')[1:])
 68 |     if not os.path.exists(args.save_dir):
 69 |         os.makedirs(args.save_dir)
 70 |     if not os.path.exists(name_dir):
 71 |         os.makedirs(name_dir)
 72 |     
 73 |     metadata = load_metadata(args.path)
 74 |     if args.robot:
 75 |         metadata = metadata[metadata['robot']  == args.robot]
 76 |     if args.filter_primitive:
 77 |         metadata = metadata[metadata['primitives'] == args.filter_primitive]
 78 | 
 79 |     ncam = min(metadata['ncam'].frame.unique().tolist())
 80 |     print('loaded {} records with robot={} and primitive={}'.format(len(metadata), args.robot, args.filter_primitive))
 81 | 
 82 |     hparams = HParams(**default_loader_hparams())
 83 |     hparams.target_adim = args.target_adim
 84 |     hparams.target_sdim = args.target_sdim
 85 |     hparams.action_mismatch = 3
 86 |     hparams.state_mismatch = 3
 87 |     hparams.cams_to_load = list(range(ncam))
 88 |     hparams.load_T = min(min(metadata['state_T']),min(metadata['img_T'])).frame
 89 |     assert len(args.img_dims) == 2, "should be (height, width) tuple"
 90 |     hparams.img_size = tuple(args.img_dims)
 91 | 
 92 |     print('saving images with adim-{}, sdim-{}, img_dims-{}, T-{}'.format(hparams.target_adim, hparams.target_sdim, hparams.img_size, hparams.load_T))
 93 | 
 94 |     record_metadata = {'adim': int(hparams.target_adim), 'sdim': int(hparams.target_sdim), 'img_dim': list(hparams.img_size), 'T': int(hparams.load_T) , 'ncam': ncam}
 95 |     json.dump(record_metadata, open('{}/format.json'.format(args.save_dir), 'w'))
 96 |     json.dump(record_metadata, open('{}/format.json'.format(name_dir), 'w'))
 97 |     pool = multiprocessing.Pool(args.n_workers)
 98 |     
 99 |     all_files = metadata.files
100 |     random.shuffle(all_files)
101 |     f_ind, r_cntr = 0, 0
102 |     f_names = []
103 |     while f_ind < len(all_files):
104 |         f_load = all_files[f_ind:f_ind + args.ex_per_record]
105 |         fm_load = [metadata.get_file_metadata(f) for f in f_load]
106 |         f_hparams = [copy.deepcopy(hparams) for _ in f_load]
107 | 
108 |         loaded_data = pool.map(_load_hdf5, [(f, fm, fh) for f, fm, fh in zip(f_load, fm_load, f_hparams)])
109 |         f_name = '{}/record{}.tfrecord'.format(args.save_dir, r_cntr)
110 |         save_record(f_name, loaded_data)
111 |         print('saved record{}.tfrecord'.format(r_cntr))
112 |         f_names.append(f_name)
113 | 
114 |         r_cntr += 1
115 |         f_ind += len(loaded_data)
116 | 
117 |     json.dump(f_names, open('{}/files.json'.format(args.save_dir), 'w'))
118 |     json.dump(f_names, open('{}/files.json'.format(name_dir), 'w'))
119 | 


--------------------------------------------------------------------------------
/robonet/datasets/util/hdf5_loader.py:
--------------------------------------------------------------------------------
  1 | import h5py
  2 | import cv2
  3 | import pdb
  4 | import imageio
  5 | import io
  6 | import hashlib
  7 | import numpy as np
  8 | import os
  9 | import random
 10 | 
 11 | 
 12 | class ACTION_MISMATCH:
 13 |     ERROR = 0
 14 |     PAD_ZERO = 1
 15 |     CLEAVE = 2
 16 | 
 17 | 
 18 | class STATE_MISMATCH:
 19 |     ERROR = 0
 20 |     PAD_ZERO = 1
 21 |     CLEAVE = 2
 22 | 
 23 | 
 24 | def default_loader_hparams():
 25 |     return {
 26 |             'target_adim': 4,
 27 |             'target_sdim': 5,
 28 |             'state_mismatch': STATE_MISMATCH.ERROR,     # TODO make better flag parsing
 29 |             'action_mismatch': ACTION_MISMATCH.ERROR,   # TODO make better flag parsing
 30 |             'img_size': [48, 64],
 31 |             'cams_to_load': [0],
 32 |             'impute_autograsp_action': True,
 33 |             'load_annotations': False,
 34 |             'zero_if_missing_annotation': False, 
 35 |             'load_T': 0                               # TODO implement error checking here for jagged reading
 36 |             }
 37 | 
 38 | 
 39 | def load_camera_imgs(cam_index, file_pointer, file_metadata, target_dims, start_time=0, n_load=None):
 40 |     cam_group = file_pointer['env']['cam{}_video'.format(cam_index)]
 41 |     old_dims = file_metadata['frame_dim']
 42 |     length = file_metadata['img_T']
 43 |     encoding = file_metadata['img_encoding']
 44 |     image_format = file_metadata['image_format']
 45 | 
 46 |     if n_load is None:
 47 |         n_load = length
 48 | 
 49 |     old_height, old_width = old_dims
 50 |     target_height, target_width = target_dims
 51 |     resize_method = cv2.INTER_CUBIC
 52 |     if target_height * target_width < old_height * old_width:
 53 |         resize_method = cv2.INTER_AREA
 54 |     
 55 |     images = np.zeros((n_load, target_height, target_width, 3), dtype=np.uint8)
 56 |     if encoding == 'mp4':
 57 |         buf = io.BytesIO(cam_group['frames'][:].tostring())
 58 |         img_buffer = [img for t, img in enumerate(imageio.get_reader(buf, format='mp4')) if start_time <= t < n_load + start_time]
 59 |     elif encoding == 'jpg':
 60 |         img_buffer = [cv2.imdecode(cam_group['frame{}'.format(t)][:], cv2.IMREAD_COLOR)[:, :, ::-1] 
 61 |                                 for t in range(start_time, start_time + n_load)]
 62 |     else: 
 63 |         raise ValueError("encoding not supported")
 64 |     
 65 |     for t, img in enumerate(img_buffer):
 66 |         if (old_height, old_width) == (target_height, target_width):
 67 |             images[t] = img
 68 |         else:
 69 |             images[t] = cv2.resize(img, (target_width, target_height), interpolation=resize_method)
 70 |     
 71 |     if image_format == 'RGB':
 72 |         return images
 73 |     elif image_format == 'BGR':
 74 |         return images[:, :, :, ::-1]
 75 |     raise NotImplementedError
 76 | 
 77 | 
 78 | def load_states(file_pointer, meta_data, hparams):
 79 |     s_T, sdim = meta_data['state_T'], meta_data['sdim']
 80 |     if hparams.target_sdim == sdim:
 81 |         return file_pointer['env']['state'][:]
 82 | 
 83 |     elif sdim < hparams.target_sdim and hparams.state_mismatch & STATE_MISMATCH.PAD_ZERO:
 84 |         pad = np.zeros((s_T, hparams.target_sdim - sdim), dtype=np.float32)
 85 |         return np.concatenate((file_pointer['env']['state'][:], pad), axis=-1)
 86 | 
 87 |     elif sdim > hparams.target_sdim and hparams.state_mismatch & STATE_MISMATCH.CLEAVE:
 88 |         return file_pointer['env']['state'][:][:, :hparams.target_sdim]
 89 | 
 90 |     else:
 91 |         raise ValueError("file sdim - {}, target sdim - {}, pad behavior - {}".format(sdim, hparams.target_sdim, hparams.state_mismatch))
 92 | 
 93 | 
 94 | def load_actions(file_pointer, meta_data, hparams):
 95 |     a_T, adim = meta_data['action_T'], meta_data['adim']
 96 |     if hparams.target_adim == adim:
 97 |         return file_pointer['policy']['actions'][:]
 98 | 
 99 |     elif hparams.target_adim == adim + 1 and hparams.impute_autograsp_action and meta_data['primitives'] == 'autograsp':
100 |         action_append, old_actions = np.zeros((a_T, 1)), file_pointer['policy']['actions'][:]
101 |         next_state = file_pointer['env']['state'][:][1:, -1]
102 |         
103 |         high_val, low_val = meta_data['high_bound'][-1], meta_data['low_bound'][-1]
104 |         midpoint = (high_val + low_val) / 2.0
105 | 
106 |         for t, s in enumerate(next_state):
107 |             if s > midpoint:
108 |                 action_append[t, 0] = high_val
109 |             else:
110 |                 action_append[t, 0] = low_val
111 |         return np.concatenate((old_actions, action_append), axis=-1)
112 | 
113 |     elif adim < hparams.target_adim and hparams.action_mismatch & ACTION_MISMATCH.PAD_ZERO:
114 |         pad = np.zeros((a_T, hparams.target_adim - adim), dtype=np.float32)
115 |         return np.concatenate((file_pointer['policy']['actions'][:], pad), axis=-1)
116 | 
117 |     elif adim > hparams.target_adim and hparams.action_mismatch & ACTION_MISMATCH.CLEAVE:
118 |         return file_pointer['policy']['actions'][:][:, :hparams.target_adim]
119 | 
120 |     else:
121 |         raise ValueError("file adim - {}, target adim - {}, pad behavior - {}".format(adim, hparams.target_adim, hparams.action_mismatch))
122 | 
123 | 
124 | def load_annotations(file_pointer, metadata, hparams, cams_to_load):
125 |     old_height, old_width = metadata['frame_dim']
126 |     target_height, target_width = hparams.img_size
127 |     scale_height, scale_width = target_height / float(old_height), target_width / float(old_width)
128 |     annot = np.zeros((metadata['img_T'], len(cams_to_load), target_height, target_width, 2), dtype=np.float32)
129 |     if metadata.get('contains_annotation', False) != True and hparams.zero_if_missing_annotation:
130 |         return annot
131 | 
132 |     assert metadata['contains_annotation'], "no annotations to load!"
133 |     point_mat = file_pointer['env']['bbox_annotations'][:].astype(np.int32)
134 | 
135 |     for t in range(metadata['img_T']):
136 |         for n, chosen_cam in enumerate(cams_to_load):
137 |             for obj in range(point_mat.shape[2]):
138 |                 h1, w1 = point_mat[t, chosen_cam, obj, 0] * [scale_height, scale_width] - 1
139 |                 h2, w2 = point_mat[t, chosen_cam, obj, 1] * [scale_height, scale_width] - 1
140 |                 h, w = int((h1 + h2) / 2), int((w1 + w2) / 2)
141 |                 annot[t, n, h, w, obj] = 1
142 |     return annot
143 | 
144 | 
145 | def load_data(f_name, file_metadata, hparams, rng=None):
146 |     rng = random.Random(rng)
147 | 
148 |     assert os.path.exists(f_name) and os.path.isfile(f_name), "invalid f_name"
149 |     with open(f_name, 'rb') as f:
150 |         buf = f.read()
151 |     assert hashlib.sha256(buf).hexdigest() == file_metadata['sha256'], "file hash doesn't match meta-data. maybe delete pkl and re-generate?"
152 |     
153 |     with h5py.File(io.BytesIO(buf)) as hf:
154 |         start_time, n_states = 0, min([file_metadata['state_T'], file_metadata['img_T'], file_metadata['action_T'] + 1])
155 |         assert n_states > 1, "must be more than one state in loaded tensor!"
156 |         if 1 < hparams.load_T < n_states:
157 |             start_time = rng.randint(0, n_states - hparams.load_T)
158 |             n_states = hparams.load_T
159 | 
160 |         assert all([0 <= i < file_metadata['ncam'] for i in hparams.cams_to_load]), "cams_to_load out of bounds!"
161 |         images, selected_cams = [], []
162 |         for cam_index in hparams.cams_to_load:
163 |             images.append(load_camera_imgs(cam_index, hf, file_metadata, hparams.img_size, start_time, n_states)[None])
164 |             selected_cams.append(cam_index)
165 |         images = np.swapaxes(np.concatenate(images, 0), 0, 1)
166 | 
167 |         actions = load_actions(hf, file_metadata, hparams).astype(np.float32)[start_time:start_time + n_states-1]
168 |         states = load_states(hf, file_metadata, hparams).astype(np.float32)[start_time:start_time + n_states]
169 | 
170 |         if hparams.load_annotations:
171 |             annotations = load_annotations(hf, file_metadata, hparams, selected_cams)[start_time:start_time + n_states]
172 |             return images, actions, states, annotations
173 | 
174 |     return images, actions, states
175 | 
176 | 
177 | if __name__ == '__main__':
178 |     import argparse
179 |     import tensorflow as tf
180 |     import robonet.datasets as datasets
181 |     import random
182 |     import matplotlib.pyplot as plt
183 | 
184 |     parser = argparse.ArgumentParser(description="tests hdf5 data loader without tensorflow dataset wrapper")
185 |     parser.add_argument('file', type=str, help="path to hdf5 you want to load")
186 |     parser.add_argument('--load_annotations', action='store_true', help="loads annotations if supplied")
187 |     parser.add_argument('--load_steps', type=int, default=0, help="loads <load_steps> steps from the dataset instead of everything")
188 |     args = parser.parse_args()
189 |     
190 |     assert 'hdf5' in args.file
191 |     data_folder = '/'.join(args.file.split('/')[:-1])
192 |     meta_data = datasets.load_metadata(data_folder)
193 | 
194 |     hparams = tf.contrib.training.HParams(**default_loader_hparams())
195 |     hparams.load_T = args.load_steps
196 |     if args.load_annotations:
197 |         hparams.load_annotations = True
198 |         print(meta_data[meta_data['contains_annotation'] == True])
199 |         meta_data = meta_data[meta_data['contains_annotation'] == True]
200 |         imgs, actions, states, annot = load_data((args.file, meta_data.get_file_metadata(args.file)), hparams)
201 |     else:
202 |         imgs, actions, states = load_data((args.file, meta_data.get_file_metadata(args.file)), hparams)
203 |     
204 |     print('actions', actions.shape)
205 |     print('states', states.shape)
206 |     print('images', imgs.shape)
207 |     
208 |     if args.load_annotations:
209 |         for o in range(2):
210 |             w = imageio.get_writer('out{}.gif'.format(o))
211 |             for t, i in enumerate(imgs):
212 |                 dist_render = plt.cm.viridis(annot[t, :, :, o])[:, :, :3]
213 |                 w.append_data((i * dist_render).astype(np.uint8))
214 |             w.close()
215 |     else:
216 |         w = imageio.get_writer('out.gif')
217 |         for i in imgs:
218 |             w.append_data(i)
219 |         w.close()
220 | 
221 | 


--------------------------------------------------------------------------------
/robonet/datasets/util/metadata_helper.py:
--------------------------------------------------------------------------------
  1 | import h5py
  2 | import pandas as pd
  3 | import numpy as np
  4 | import glob
  5 | import os
  6 | from tqdm import tqdm
  7 | from multiprocessing import Pool, cpu_count
  8 | import hashlib
  9 | import io
 10 | import random
 11 | 
 12 | 
 13 | class MetaDataContainer:
 14 |     def __init__(self, base_path, meta_data):
 15 |         self._meta_data = meta_data
 16 |         self._base_path = base_path
 17 | 
 18 |     def get_file_metadata(self, fname):
 19 |         fname = fname.split('/')[-1]
 20 |         return self._meta_data.loc[fname]
 21 | 
 22 |     def select_objects(self, obj_class_name):
 23 |         if isinstance(obj_class_name, str):
 24 |             return self._meta_data[[obj_class_name in x for x in self._meta_data['object_classes']]]
 25 |         return self._meta_data[[set(obj_class_name) == set(x) for x in self._meta_data['object_classes']]]
 26 | 
 27 |     @property
 28 |     def frame(self):
 29 |         return self._meta_data
 30 |     
 31 |     @property
 32 |     def files(self):
 33 |         return ['{}/{}'.format(self._base_path, f) for f in self.frame.index]
 34 |     
 35 |     def get_shuffled_files(self, rng=None):
 36 |         files = ['{}/{}'.format(self._base_path, f) for f in self.frame.index]
 37 |         if rng:
 38 |             rng.shuffle(files)
 39 |         else:
 40 |             random.shuffle(files)
 41 |         return files
 42 | 
 43 |     @property
 44 |     def base_path(self):
 45 |         return self._base_path
 46 |     
 47 |     def __getitem__(self, arg):
 48 |         return MetaDataContainer(self._base_path, self._meta_data[arg])
 49 |     
 50 |     def __contains__(self, item):
 51 |         return item in self._meta_data
 52 |     
 53 |     def __repr__(self):
 54 |         return repr(self._meta_data)
 55 |     
 56 |     def __str__(self):
 57 |         return str(self._meta_data)
 58 |     
 59 |     def __eq__(self, other):
 60 |         return self._meta_data == other
 61 |     
 62 |     def __ne__(self, other):
 63 |         return self._meta_data != other
 64 | 
 65 |     def __lt__(self, other):
 66 |         return self._meta_data < other
 67 | 
 68 |     def __le__(self, other):
 69 |         return self._meta_data <= other
 70 | 
 71 |     def __gt__(self, other):
 72 |         return self._meta_data > other
 73 | 
 74 |     def __ge__(self, other):
 75 |         return self._meta_data >= other
 76 | 
 77 |     def keys(self):
 78 |         return self._meta_data.keys()
 79 |     
 80 |     def __len__(self):
 81 |         return len(self._meta_data)
 82 | 
 83 | 
 84 | def load_metadata_dict(fname):
 85 |     if not os.path.exists(fname) or not os.path.isfile(fname):
 86 |         raise IOError("can't find {}".format(fname))
 87 |     buf = open(fname, 'rb').read()
 88 | 
 89 |     with h5py.File(io.BytesIO(buf)) as hf:
 90 |         meta_data_dict = {'file_version': hf['file_version'][()]}
 91 | 
 92 |         meta_data_dict['sha256'] = hashlib.sha256(buf).hexdigest()
 93 |         meta_data_dict['sdim'] = hf['env']['state'].shape[1]
 94 |         meta_data_dict['state_T'] = hf['env']['state'].shape[0]
 95 | 
 96 |         meta_data_dict['adim'] = hf['policy']['actions'].shape[1]
 97 |         meta_data_dict['action_T'] =hf['policy']['actions'].shape[0]
 98 | 
 99 |         # assumes all cameras have same attributes (if they exist)
100 |         n_cams = hf['env'].attrs.get('n_cams', 0)
101 |         if n_cams:
102 |             meta_data_dict['ncam'] = n_cams
103 | 
104 |             if hf['env'].attrs['cam_encoding'] == 'mp4':
105 |                 meta_data_dict['frame_dim'] = hf['env']['cam0_video']['frames'].attrs['shape'][:2]
106 |                 meta_data_dict['img_T'] = hf['env']['cam0_video']['frames'].attrs['T']
107 |                 meta_data_dict['img_encoding'] = 'mp4'
108 |                 meta_data_dict['image_format'] = hf['env']['cam0_video']['frames'].attrs['image_format']
109 |             else:
110 |                 meta_data_dict['frame_dim'] = hf['env']['cam0_video']['frame0'].attrs['shape'][:2]
111 |                 meta_data_dict['image_format'] = hf['env']['cam0_video']['frame0'].attrs['image_format']
112 |                 meta_data_dict['img_encoding'] = 'jpg'
113 |                 meta_data_dict['img_T'] = len(hf['env']['cam0_video'])
114 | 
115 |         # TODO: remove misc field and shift all to meta-data
116 |         for k in hf['misc'].keys():
117 |             assert k not in meta_data_dict, "key {} already present!".format(k)
118 |             meta_data_dict[k] = hf['misc'][k][()]
119 |         
120 |         
121 |         for k in hf['metadata'].attrs.keys():
122 |             assert k not in meta_data_dict, "key {} already present!".format(k)
123 |             meta_data_dict[k] = hf['metadata'].attrs[k]
124 |         
125 |         if 'low_bound' not in meta_data_dict and 'low_bound' in hf['env']:
126 |             meta_data_dict['low_bound'] = hf['env']['low_bound'][0]
127 |         
128 |         if 'high_bound' not in meta_data_dict and 'high_bound' in hf['env']:
129 |             meta_data_dict['high_bound'] = hf['env']['high_bound'][0]
130 |         
131 |         return meta_data_dict
132 | 
133 | def get_metadata_frame(files):
134 |     if isinstance(files, str):
135 |         base_path = files
136 |         files = sorted(glob.glob('{}/*.hdf5'.format(files)))
137 |         if not files:
138 |             raise ValueError('no hdf5 files found!')
139 | 
140 |         if os.path.exists('{}/meta_data.pkl'.format(base_path)):
141 |             meta_data = pd.read_pickle('{}/meta_data.pkl'.format(base_path), compression='gzip')
142 |             
143 |             registered_fnames = set([f for f in meta_data.index])
144 |             loaded_fnames = set([f.split('/')[-1] for f in files])
145 | 
146 |             if loaded_fnames == registered_fnames:
147 |                 return meta_data
148 |             os.remove('{}/meta_data.pkl'.format(base_path))
149 |             print('regenerating meta_data file!')
150 |     elif isinstance(files, (list, tuple)):
151 |         base_path=None
152 |         files = sorted(files)
153 |     else:
154 |         raise ValueError("Must be path to files or list/tuple of filenames")
155 | 
156 |     with Pool(cpu_count()) as p:
157 |         meta_data = list(tqdm(p.imap(load_metadata_dict, files), total=len(files)))
158 |     
159 |     data_frame = pd.DataFrame(meta_data, index=[f.split('/')[-1] for f in files])
160 |     if base_path:
161 |         data_frame.to_pickle("{}/meta_data.pkl".format(base_path), compression='gzip')
162 |     return data_frame
163 | 
164 | 
165 | def load_metadata(files):
166 |     base_path = files
167 |     if isinstance(files, (tuple, list)):
168 |         base_path = ''
169 |     else:
170 |         files = base_path = os.path.expanduser(base_path)
171 | 
172 |     return MetaDataContainer(base_path, get_metadata_frame(files))
173 | 
174 | 
175 | if __name__ == '__main__':
176 |     import argparse
177 |     import pdb
178 | 
179 |     parser = argparse.ArgumentParser(description="calculates or loads meta_data frame")
180 |     parser.add_argument('path', help='path to files containing hdf5 dataset')
181 |     args = parser.parse_args()
182 |     data_frame = load_metadata(args.path)
183 |     pdb.set_trace()
184 |     print('loaded frame')
185 | 


--------------------------------------------------------------------------------
/robonet/datasets/util/tensor_multiplexer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | def multiplex_tensors(dataset, key_name, train_cond=None):
 6 |     if train_cond is None:
 7 |         _train_cond = tf.placeholder(tf.int32, shape=[], name="train_cond")
 8 |     else:
 9 |         _train_cond = train_cond
10 | 
11 |     tensors = [dataset[key_name, m] for m in dataset.modes]
12 |     assert len(tensors), "can't multiplex across no modes!"
13 | 
14 |     if len(tensors) == 1:
15 |         if train_cond is None:
16 |             return tensors[0], _train_cond
17 |         return tensors[0]
18 |     
19 |     top_tensor = tensors[-1]
20 |     for ind in range(len(tensors) - 1, 0, -1):
21 |         top_tensor = tf.cond(_train_cond < ind, lambda: tensors[ind - 1], lambda: top_tensor)
22 | 
23 |     if train_cond is None:
24 |         return top_tensor, _train_cond
25 |     return top_tensor
26 | 
27 | 
28 | class MultiplexedTensors:
29 |     def __init__(self, dataset, tensor_names):
30 |         self._dataset = dataset
31 |         self._mode_ind = {}
32 |         for i, k in enumerate(dataset.modes):
33 |             self._mode_ind[k] = i
34 |         
35 |         self._train_cond = tf.placeholder(tf.int32, shape=[], name="train_cond")
36 |         self._tensor_dict = OrderedDict()
37 |         for t in tensor_names:
38 |             self._tensor_dict[t] = multiplex_tensors(dataset, t, self._train_cond)
39 | 
40 |     def __getitem__(self, key):
41 |         return self._tensor_dict[key]
42 | 
43 |     @property
44 |     def dict(self):
45 |         return self._tensor_dict
46 |     
47 |     def get_feed_dict(self, mode):
48 |         dataset_feed = self._dataset.build_feed_dict(mode)
49 |         if isinstance(mode, int):
50 |             assert 0 <= mode < len(self._mode_ind.keys()), "mode_index must be in range 0 to len(modes) - 1"
51 |             dataset_feed[self._train_cond] = mode
52 |             return dataset_feed
53 |         
54 |         assert isinstance(mode, str) 
55 |         assert mode in self._mode_ind, "{} not supported! Modes are {}".foramt(mode, self._mode_ind.keys())
56 |                
57 |         dataset_feed[self._train_cond] = self._mode_ind[mode]
58 |         return dataset_feed
59 | 
60 |     @property
61 |     def modes(self):
62 |         return list(self._mode_ind.keys())
63 | 


--------------------------------------------------------------------------------
/robonet/datasets/variants/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudeepDasari/RoboNet/d83eee20f39653c3f8e7c349df7350e8a9e9f7a7/robonet/datasets/variants/__init__.py


--------------------------------------------------------------------------------
/robonet/datasets/variants/annotation_benchmark_dataset.py:
--------------------------------------------------------------------------------
 1 | from robonet.datasets.robonet_dataset import RoboNetDataset
 2 | from robonet.datasets.util.dataset_utils import split_train_val_test
 3 | 
 4 | 
 5 | class AnnotationBenchmarkDataset(RoboNetDataset):
 6 |     """
 7 |     Separates files that have annotations and those which don't
 8 |         - files with annotations are loaded as validation files
 9 |         - all others are loaded as train/test
10 |     """
11 |     def __init__(self, batch_size, dataset_files_or_metadata, hparams=dict()):
12 |         self._annotated_robots = None
13 |         super(AnnotationBenchmarkDataset, self).__init__(batch_size, dataset_files_or_metadata, hparams)
14 | 
15 |     @staticmethod
16 |     def _get_default_hparams(parent_hparams=None):
17 |         if parent_hparams is None:
18 |             parent_hparams = RoboNetDataset._get_default_hparams()
19 |         parent_hparams.load_annotations = True
20 |         parent_hparams.zero_if_missing_annotation = True
21 |         return parent_hparams
22 | 
23 |     def _split_files(self, source_number, metadata):
24 |         assert self._hparams.load_annotations, "mode requires annotation loading"
25 |         assert self._hparams.zero_if_missing_annotation, "mode requires some files to not be annotated"
26 | 
27 |         non_annotated_metadata = metadata[metadata['contains_annotation'] != True]
28 |         
29 |         if self._hparams.train_ex_per_source != [-1]:
30 |             train_files, val_files, test_files = split_train_val_test(metadata, train_ex=self._hparams.train_ex_per_source[source_number], rng=self._random_generator['base'])
31 |         else:
32 |             train_files, val_files, test_files = split_train_val_test(non_annotated_metadata, splits=self._hparams.splits, rng=self._random_generator['base'])
33 |         
34 |         all_annotated = metadata[metadata['contains_annotation'] == True]
35 |         robot_files = [all_annotated[all_annotated['robot'] == r].files for r in self._annotated_robots]
36 | 
37 |         if len(self._annotated_robots) == 1:
38 |             return [train_files, val_files, test_files] + robot_files
39 |         return [train_files, val_files, test_files] + [all_annotated.files] + robot_files
40 |     
41 |     @property
42 |     def modes(self):
43 |         if self._annotated_robots is None:
44 |             self._annotated_robots = []
45 |             for m in self._metadata:
46 |                 annotated_robots_from_source = m[m['contains_annotation'] == True]['robot'].frame.unique().tolist()
47 |                 self._annotated_robots.extend(annotated_robots_from_source)
48 |             self._annotated_robots = list(set(self._annotated_robots))
49 | 
50 |         all_annotated_mode = []
51 |         if len(self._annotated_robots) > 1:
52 |             all_annotated_mode = ['all_annotated']
53 |     
54 |         return ['train', 'val', 'test'] + all_annotated_mode + ['{}_annotated'.format(r) for r in self._annotated_robots]
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     import argparse
59 |     import tensorflow as tf
60 |     import numpy as np
61 |     parser = argparse.ArgumentParser(description="calculates or loads meta_data frame")
62 |     parser.add_argument('path', help='path to files containing hdf5 dataset')
63 |     parser.add_argument('--batch_size', type=int, default=32, help='batch size for test loader (should be even for non-time test demo to work)')
64 |     parser.add_argument('--mode', type=str, default='val', help='mode to grab data from')
65 |     parser.add_argument('--load_steps', type=int, default=0, help='if value is provided will load <load_steps> steps')
66 |     args = parser.parse_args()
67 | 
68 |     hparams = {'ret_fnames': True, 'load_T': args.load_steps,'action_mismatch': 3, 'state_mismatch': 3, 'splits':[0.8, 0.1, 0.1], 'same_cam_across_sub_batch':False}
69 |     loader = AnnotationBenchmarkDataset(args.batch_size, args.path, hparams=hparams)
70 |     print('modes are', loader.modes)
71 | 
72 |     tensors = [loader[x, args.mode] for x in ['images', 'states', 'actions', 'annotations', 'f_names']]
73 |     s = tf.Session()
74 |     out_tensors = s.run(tensors, feed_dict=loader.build_feed_dict(args.mode))
75 |     
76 |     import imageio
77 |     writer = imageio.get_writer('test_frames.gif')
78 |     for t in range(out_tensors[0].shape[1]):
79 |         writer.append_data((np.concatenate([b for b in out_tensors[0][:, t, 0]], axis=-2) * 255).astype(np.uint8))
80 |     writer.close()
81 |     import pdb; pdb.set_trace()
82 |     print('loaded tensors!')
83 | 


--------------------------------------------------------------------------------
/robonet/datasets/variants/val_filter_dataset_variants.py:
--------------------------------------------------------------------------------
 1 | from robonet.datasets.robonet_dataset import RoboNetDataset
 2 | from tensorflow.contrib.training.python.training.hparam import HParams
 3 | from robonet.datasets.variants.annotation_benchmark_dataset import AnnotationBenchmarkDataset
 4 | import pdb
 5 | 
 6 | 
 7 | """
 8 | Should perhaps update these to work with new API
 9 | """
10 | class ValFilterDataset(RoboNetDataset):
11 |     """
12 |     Separates files that have annotations and those which don't
13 |         - files with annotations are loaded as validation files
14 |         - all others are loaded as train/test
15 |     """
16 | 
17 |     def _split_files(self, metadata):
18 |         train_metadata, val_metadata = self.train_val_filter(metadata, metadata)
19 | 
20 |         train_files, test_files, val_files = [], [], []
21 |         train_test_files = train_metadata.files
22 |         val_files = val_metadata.files
23 |         [self.rng.shuffle(files) for files in [train_test_files, val_files]]
24 |         train_pivot = int(len(train_test_files) * self._hparams.splits[0])
25 |         if self._hparams.splits[0]:
26 |             train_files = train_test_files[:train_pivot]
27 |         if self._hparams.splits[1]:
28 |             val_files = val_files
29 |         if self._hparams.splits[2]:
30 |             test_files = train_test_files[train_pivot:]
31 |         return train_files, val_files, test_files
32 | 
33 |     def train_val_filter(self, train_metadata, val_metadata):
34 |         """
35 |         :param metadata:
36 |         :return: train_metadata, val_metadata
37 |         """
38 |         raise NotImplementedError
39 | 
40 | class HeldoutRobotDataset(ValFilterDataset):
41 |     """
42 |     Use files from one held-out robot for testing and files from all other robots for training
43 |     """
44 |     @staticmethod
45 |     def _get_default_hparams(parent_hparams=None):
46 |         if parent_hparams is None:
47 |             parent_hparams = ValFilterDataset._get_default_hparams()
48 |         parent_hparams.add_hparam('held_out_robot', '')
49 |         return parent_hparams
50 | 
51 |     def train_val_filter(self, train_metadata, val_metadata):
52 |         train_metadata = train_metadata[train_metadata['robot'] != self._hparams.held_out_robot]
53 |         val_metadata = val_metadata[val_metadata['robot'] == self._hparams.held_out_robot]
54 |         print('after filtering robots: number of trainfiles {} number of val files {}'.format(len(train_metadata.files), len(val_metadata.files)))
55 |         return train_metadata, val_metadata
56 | 
57 | 
58 | class AnnotationHeldoutRobotDataset(HeldoutRobotDataset, AnnotationBenchmarkDataset):
59 | 
60 |     @staticmethod
61 |     def _get_default_hparams():
62 |         combined_params = RoboNetDataset._get_default_hparams()
63 |         combined_params = HeldoutRobotDataset._get_default_hparams(combined_params)
64 |         combined_params = AnnotationBenchmarkDataset._get_default_hparams(combined_params)
65 |         return combined_params
66 | 
67 |     def train_val_filter(self, train_metadata, val_metadata):
68 |         print('before filtering: number of trainfiles {} number of val files {}'.format(len(train_metadata.files), len(val_metadata.files)))
69 |         train_metadata, val_metadata = HeldoutRobotDataset.train_val_filter(self, train_metadata, val_metadata)
70 |         train_metadata, val_metadata = AnnotationBenchmarkDataset.train_val_filter(self, train_metadata, val_metadata)
71 |         return train_metadata, val_metadata


--------------------------------------------------------------------------------
/robonet/inverse_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudeepDasari/RoboNet/d83eee20f39653c3f8e7c349df7350e8a9e9f7a7/robonet/inverse_model/__init__.py


--------------------------------------------------------------------------------
/robonet/inverse_model/models/__init__.py:
--------------------------------------------------------------------------------
 1 | def get_models(class_name):
 2 |     if class_name == 'DeterministicInverseModel':
 3 |         from .deterministic_inverse_model import DeterministicInverseModel
 4 |         return DeterministicInverseModel
 5 |     if class_name == 'DiscretizedInverseModel':
 6 |         from .discretized_inverse_model import DiscretizedInverseModel
 7 |         return DiscretizedInverseModel
 8 |     raise NotImplementedError
 9 | 
10 | 


--------------------------------------------------------------------------------
/robonet/inverse_model/models/base_inverse_model.py:
--------------------------------------------------------------------------------
 1 | from robonet.video_prediction.models.base_model import BaseModel
 2 | from robonet.inverse_model.models.graphs import get_graph_class
 3 | 
 4 | 
 5 | class BaseInverseModel(BaseModel):
 6 |     def _get_graph(self, graph_type):
 7 |         return get_graph_class(graph_type)
 8 | 
 9 |     def _default_scope(self):
10 |         return 'inverse_model'
11 | 


--------------------------------------------------------------------------------
/robonet/inverse_model/models/deterministic_inverse_model.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Boiled down version of SAVP model from https://github.com/alexlee-gk/video_prediction
 3 | """
 4 | from robonet.inverse_model.models.base_inverse_model import BaseInverseModel
 5 | from robonet.video_prediction.utils import tf_utils
 6 | import tensorflow as tf
 7 | from collections import OrderedDict
 8 | from robonet.video_prediction import losses
 9 | from robonet.video_prediction.utils import tf_utils
10 | 
11 | 
12 | class DeterministicInverseModel(BaseInverseModel):
13 |     def _model_default_hparams(self):
14 |         return {
15 |             "lr": 0.001,
16 |             "end_lr": 0.0,
17 |             "beta1": 0.9,
18 |             "beta2": 0.999,
19 |         }
20 | 
21 |     def _model_fn(self, model_inputs, model_targets, mode):
22 |         inputs, targets = {}, None
23 |         inputs['start_images'] = model_inputs['images'][:, 0]
24 |         inputs['goal_images'] = model_inputs['images'][:, -1]
25 |         if mode == tf.estimator.ModeKeys.TRAIN:
26 |             inputs['T'] = model_targets['actions'].get_shape().as_list()[1]
27 |             inputs['adim'] = model_targets['actions'].get_shape().as_list()[2]
28 |             inputs['real_actions'] = targets = model_targets['actions']
29 |         else:
30 |             inputs['adim'] = model_inputs['adim']
31 |             inputs['T'] = model_inputs['T']
32 | 
33 |         # build the graph
34 |         self._model_graph = model_graph = self._graph_class()
35 | 
36 |         if self._num_gpus <= 1:
37 |             outputs = model_graph.build_graph(mode, inputs, self._hparams, self._graph_scope)
38 |         else:
39 |             # TODO: add multi-gpu support
40 |             raise NotImplementedError
41 |     
42 |         # train
43 |         if mode == tf.estimator.ModeKeys.TRAIN:
44 |             global_step = tf.train.get_or_create_global_step()
45 |             lr, optimizer = tf_utils.build_optimizer(self._hparams.lr, self._hparams.beta1, self._hparams.beta2, global_step=global_step)
46 |             loss = losses.l1_loss(targets, outputs['pred_actions'])
47 | 
48 |             print('computing gradient and train_op')
49 |             g_train_op = optimizer.minimize(loss, global_step=global_step)
50 |             
51 |             est = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=g_train_op)
52 |             scalar_summaries = {}
53 |             if 'ground_truth_sampling_mean' in outputs:
54 |                 scalar_summaries['ground_truth_sampling_mean'] = outputs['ground_truth_sampling_mean']
55 |             return est, scalar_summaries, {}
56 |             
57 |         #test
58 |         return outputs['pred_actions']
59 | 
60 | 


--------------------------------------------------------------------------------
/robonet/inverse_model/models/discretized_inverse_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Boiled down version of SAVP model from https://github.com/alexlee-gk/video_prediction
  3 | """
  4 | from robonet.inverse_model.models.base_inverse_model import BaseInverseModel
  5 | from robonet.video_prediction.utils import tf_utils
  6 | import tensorflow as tf
  7 | from collections import OrderedDict
  8 | from robonet.video_prediction import losses
  9 | from robonet.video_prediction.utils import tf_utils
 10 | 
 11 | 
 12 | def _binarize(actions, pivots):
 13 |     n_xy = (len(pivots[0]) + 1) * (len(pivots[1]) + 1)
 14 |     n_z =  len(pivots[2]) + 1
 15 |     n_theta = len(pivots[3]) + 1
 16 | 
 17 |     B = actions.get_shape().as_list()[0]
 18 |     input_adim = actions.get_shape().as_list()[2]
 19 |     T = actions.get_shape().as_list()[1]
 20 | 
 21 |     assert input_adim == 4, "only supports [x,y,z,theta] action space for now!"
 22 |     assert len(pivots) == input_adim, "bad discretization pivots array!"
 23 |     binned_actions = []
 24 |     for a in range(input_adim):
 25 |         binned_action = tf.zeros((B, T), dtype=tf.int32)
 26 |         for p in range(len(pivots[a])):
 27 |             pivot = pivots[a][p]
 28 |             binned_action = tf.where_v2(actions[:, :, a] > pivot, binned_action + 1, binned_action)
 29 |         binned_actions.append(binned_action)
 30 |     
 31 |     xy_act = binned_actions[0] + (len(pivots[0]) + 1) * binned_actions[1]
 32 |     z_act, theta_act = binned_actions[2], binned_actions[3]
 33 |     one_hot_actions = [tf.one_hot(tensor, n_dim) for tensor, n_dim in zip((xy_act, z_act, theta_act), (n_xy, n_z, n_theta))]
 34 |     return one_hot_actions
 35 | 
 36 | 
 37 | class DiscretizedInverseModel(BaseInverseModel):
 38 |     def _model_default_hparams(self):
 39 |         return {
 40 |             "context_actions": 0,
 41 |             "lr": 0.001,
 42 |             "end_lr": 0.0,
 43 |             "beta1": 0.9,
 44 |             "beta2": 0.999,
 45 |             "pivots": [[-0.04483253140755173, -0.02947711320550581, -0.018373884708696702, -0.008892051974322548, -4.59881939272745e-05, 0.008815899693566963, 0.018292582474913204, 0.02938255920278165, 0.04470332342338521],
 46 |                         [-0.044674549010427486, -0.029352782231283018, -0.018263887904468375, -0.008836470630237072, 7.81874877900302e-06, 0.00884825636063618, 0.01830693463003378, 0.029377939442953, 0.04473508111072804],
 47 |                         [-0.10348141529525286, -0.06793363038544242, -0.042405628783200776, -0.02067683018449292, -0.0003540274691179853, 0.019988218195319766, 0.04168513725690283, 0.06726936589279635, 0.10260515613003221],
 48 |                         [-0.22409500837108018, -0.1470685835529137, -0.09166876049855337, -0.04419968109806307, 5.580875190224738e-05, 0.044414223320168145, 0.09168509202611021, 0.1469321233733917, 0.2237400683241968]],
 49 |             "means": [[-0.05844043352506317, -0.0365598888753108, -0.02371854361080623, -0.01355633452537272, -0.004447061217304071, 0.004359558603466982, 0.01346781084244209, 0.02363783086130393, 0.036456939880113295, 0.05834560772861528],
 50 |                       [-0.05831025927528526, -0.03643373528153938, -0.023609139710274608, -0.013465667182953755, -0.004399357688117235, 0.004405043570967748, 0.013491056349448851, 0.023632353969085647, 0.03646405448080863, 0.0583175660974888],
 51 |                       [-0.14210753817324154, -0.08433897448430323, -0.054693763651882464, -0.03133158710778195, -0.010471111756616646, 0.00976338713468559, 0.030621494148596932, 0.05401384675615853, 0.08356642563278535, 0.1406814351222195],
 52 |                       [-0.30673709675244115, -0.1826234528754964, -0.11831810064105407, -0.06747048133665953, -0.02199376800432712, 0.02209506703978301, 0.06762712392804507, 0.11832652238765545, 0.18242774553595653, 0.30635348910031857]]
 53 |         }
 54 | 
 55 |     def _model_fn(self, model_inputs, model_targets, mode):
 56 |         inputs = {}
 57 |         if self._hparams.context_actions:
 58 |             inputs['context_frames'] = model_inputs['images'][:, :self._hparams.context_actions]
 59 |         inputs['start_images'] = model_inputs['images'][:, self._hparams.context_actions]
 60 |         inputs['goal_images'] = model_inputs['images'][:, -1]
 61 | 
 62 |         n_xy = (len(self._hparams.pivots[0]) + 1) * (len(self._hparams.pivots[1]) + 1)
 63 |         n_z =  len(self._hparams.pivots[2]) + 1
 64 |         n_theta = len(self._hparams.pivots[3]) + 1
 65 | 
 66 |         if mode == tf.estimator.ModeKeys.TRAIN:
 67 |             one_hot_actions = _binarize(model_targets['actions'], self._hparams.pivots)
 68 |             if self._hparams.context_actions:
 69 |                 inputs['context_actions'] = tf.concat([x[:, :self._hparams.context_actions] for x in one_hot_actions], -1)
 70 |             real_pred_actions = [x[:, self._hparams.context_actions:] for x in one_hot_actions]
 71 |             inputs['real_actions'] = tf.concat(real_pred_actions, -1)
 72 |             inputs['T'] = model_targets['actions'].get_shape().as_list()[1] - self._hparams.context_actions
 73 |         else:
 74 |             assert model_inputs['adim'] == 4, "only supports [x,y,z,theta] action space for now!"
 75 |             inputs['T'] = model_inputs['T'] - self._hparams.context_actions
 76 |             if self._hparams.context_actions:
 77 |                 one_hot_actions = _binarize(model_inputs['context_actions'], self._hparams.pivots)
 78 |                 inputs['context_actions'] = tf.concat([x[:, :self._hparams.context_actions] for x in one_hot_actions], -1)
 79 | 
 80 |         inputs['adim'] = (len(self._hparams.pivots[0]) + 1) * (len(self._hparams.pivots[1]) + 1) + sum([len(arr) + 1 for arr in self._hparams.pivots[2:]])
 81 | 
 82 |         # build the graph
 83 |         self._model_graph = model_graph = self._graph_class()
 84 |         if self._num_gpus <= 1:
 85 |             outputs = model_graph.build_graph(mode, inputs, self._hparams, self._graph_scope)
 86 |         else:
 87 |             # TODO: add multi-gpu support
 88 |             raise NotImplementedError
 89 |     
 90 |         # train
 91 |         if mode == tf.estimator.ModeKeys.TRAIN:
 92 |             global_step = tf.train.get_or_create_global_step()
 93 |             lr, optimizer = tf_utils.build_optimizer(self._hparams.lr, self._hparams.beta1, self._hparams.beta2, global_step=global_step)
 94 |             pred_xy = outputs['pred_actions'][:, :, :n_xy]
 95 |             pred_z = outputs['pred_actions'][:, :, n_xy:n_z + n_xy]
 96 |             pred_theta = outputs['pred_actions'][:, :, n_z + n_xy:]
 97 |             pred_one_hots = [pred_xy, pred_z, pred_theta]
 98 | 
 99 |             losses = [tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(real, pred)) for real, pred in zip(real_pred_actions, pred_one_hots)]
100 |             loss = sum(losses)
101 | 
102 |             print('computing gradient and train_op')
103 |             g_train_op = optimizer.minimize(loss, global_step=global_step)
104 |             
105 |             est = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=g_train_op)
106 |             scalar_summaries = {}
107 |             if 'ground_truth_sampling_mean' in outputs:
108 |                 scalar_summaries['ground_truth_sampling_mean'] = outputs['ground_truth_sampling_mean']
109 |             
110 |             for k, loss in zip(['xy_loss', 'z_loss', 'theta_loss'], losses):
111 |                 scalar_summaries[k] = loss
112 |             return est, scalar_summaries, {}
113 |             
114 |         #test
115 |         means = tf.convert_to_tensor(self._hparams.means)
116 |         pred_xy = outputs['pred_actions'][:, :, :n_xy]
117 |         pred_z = outputs['pred_actions'][:, :, n_xy:n_z + n_xy]
118 |         pred_theta = outputs['pred_actions'][:, :, n_z + n_xy:]
119 | 
120 |         pred_xy = tf.reshape(tf.random.categorical(tf.reshape(pred_xy, (-1, n_xy)), 1, dtype=tf.int32), (-1, inputs['T']))
121 |         pred_x, pred_y = tf.mod(pred_xy, len(self._hparams.pivots[0]) + 1), tf.floordiv(pred_xy, len(self._hparams.pivots[0]) + 1)
122 |         pred_z = tf.reshape(tf.random.categorical(tf.reshape(pred_z, (-1, n_z)), 1, dtype=tf.int32), (-1, inputs['T']))
123 |         pred_theta = tf.reshape(tf.random.categorical(tf.reshape(pred_theta, (-1, n_theta)), 1, dtype=tf.int32), (-1, inputs['T']))
124 | 
125 |         outputs['pred_actions'] = tf.concat([tf.gather(means[i], indices)[:, :, None] for i, indices in 
126 |                                             enumerate([pred_x, pred_y, pred_z, pred_theta])], axis=-1)
127 |         return outputs['pred_actions']
128 | 


--------------------------------------------------------------------------------
/robonet/inverse_model/models/graphs/__init__.py:
--------------------------------------------------------------------------------
1 | def get_graph_class(class_name):
2 |     if class_name == 'lstm_baseline':
3 |         from .lstm_baseline import LSTMBaseline
4 |         return LSTMBaseline
5 |     else:
6 |         raise NotImplementedError
7 | 


--------------------------------------------------------------------------------
/robonet/inverse_model/models/graphs/base_graph.py:
--------------------------------------------------------------------------------
 1 | from robonet.video_prediction.models.graphs.base_graph import BaseGraph as BaseVpredGraph
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | class BaseGraph(BaseVpredGraph):
 6 |     @staticmethod
 7 |     def default_hparams():
 8 |         return {
 9 |         }
10 | 


--------------------------------------------------------------------------------
/robonet/inverse_model/models/graphs/lstm_baseline.py:
--------------------------------------------------------------------------------
  1 | from robonet.inverse_model.models.graphs.base_graph import BaseGraph
  2 | import itertools
  3 | import tensorflow as tf
  4 | import tensorflow.keras.layers as layers
  5 | from robonet.inverse_model.models.layers.vgg_pretrain import get_vgg_dict, vgg_preprocess_images, vgg_conv, vgg_pool
  6 | 
  7 | 
  8 | class ImageEncoder(tf.Module):
  9 |     def __init__(self, conv_filters, kernel_size, out_dim, vgg_path, n_convs=3, padding='same', fc_layer=256):
 10 |         self._vgg_dict = get_vgg_dict(vgg_path)
 11 |         
 12 |         self._convs = [[layers.Conv2D(conv_filters, kernel_size, padding="same", dilation_rate=min(c + 1, 3)), 
 13 |                         layers.BatchNormalization(axis=-1)] for c in range(n_convs)]
 14 | 
 15 |         # top layer
 16 |         self._fc_layer = [layers.Dense(fc_layer), layers.BatchNormalization(axis=-1)]
 17 |         self._top = [layers.Dense(out_dim), layers.BatchNormalization(axis=-1)]
 18 |  
 19 |     def __call__(self, input_img, training=True):
 20 |         preprocessed = vgg_preprocess_images(input_img)
 21 |         conv1_out = vgg_conv(self._vgg_dict, vgg_conv(self._vgg_dict, preprocessed, "conv1_1"), "conv1_2")
 22 |         conv1_out = vgg_pool(conv1_out, "pool1")
 23 | 
 24 |         conv2_out = vgg_conv(self._vgg_dict, vgg_conv(self._vgg_dict, conv1_out, "conv2_1"), "conv2_2")
 25 |         conv2_out = vgg_pool(conv2_out, "pool2")
 26 | 
 27 |         conv3_out = conv2_out
 28 |         for c in ['conv3_1', 'conv3_2', 'conv3_3', 'conv3_4']:
 29 |             conv3_out = vgg_conv(self._vgg_dict, conv3_out, c)
 30 |         conv3_out = vgg_pool(conv3_out, "pool3")
 31 | 
 32 |         conv4_out = conv3_out
 33 |         for c in ['conv4_1', 'conv4_2', 'conv4_3', 'conv4_4']:
 34 |             conv4_out = vgg_conv(self._vgg_dict, conv4_out, c)
 35 |         conv4_out = vgg_pool(conv4_out, "pool4")
 36 | 
 37 |         top = vgg_conv(self._vgg_dict, conv4_out, "conv5_1")
 38 |         for layer in self._convs:
 39 |             conv, norm = layer
 40 |             top = norm(tf.nn.relu(conv(top))) + top
 41 |         
 42 |         top = tf.nn.max_pool(top, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='top_pool')
 43 |         dense, norm = self._fc_layer
 44 |         top = norm(tf.nn.relu(dense(tf.reshape(top, (top.get_shape().as_list()[0], -1)))))
 45 |         
 46 |         dense, norm = self._top
 47 |         return norm(tf.nn.relu(dense(top)))
 48 |  
 49 | 
 50 | class LSTMBaseline(BaseGraph):
 51 |     def build_graph(self, mode, inputs, hparams, scope_name='flow_generator'):
 52 |         is_train = mode == tf.estimator.ModeKeys.TRAIN
 53 |         B = inputs['start_images'].get_shape().as_list()[0]
 54 |         self._scope_name = scope_name
 55 |         outputs = {}
 56 |         with tf.variable_scope(scope_name):
 57 |             encoder = ImageEncoder(hparams.conv_filters, hparams.kernel_size, hparams.enc_dim, hparams.vgg_path, hparams.n_convs)
 58 |             start_enc = encoder(inputs['start_images'], training=is_train)
 59 |             goal_enc = encoder(inputs['goal_images'], training=is_train)
 60 |             start_goal_enc = tf.concat((start_enc, goal_enc), -1)
 61 | 
 62 |             lstm_in = layers.Dense(hparams.latent_dim * inputs['T'])(start_goal_enc)
 63 |             lstm_in = layers.BatchNormalization(axis=-1)(tf.nn.relu(lstm_in), training=is_train)
 64 |             lstm_in = tf.reshape(lstm_in, (-1, inputs['T'], hparams.latent_dim))
 65 |             
 66 |             lstm_dim = hparams.latent_dim 
 67 |             if hparams.append_last_action:
 68 |                 lstm_dim += + inputs['adim']
 69 |             
 70 |             lstm = layers.LSTM(lstm_dim)
 71 |             lstm.cell.build([B, inputs['T'], lstm_dim])
 72 | 
 73 |             if 'context_actions' in inputs:
 74 |                 last_action = inputs['context_actions'][:, -1]
 75 |             else:
 76 |                 last_action = tf.zeros((B, inputs['adim']))
 77 | 
 78 |             action_predictions = []
 79 |             top_layer = layers.Dense(inputs['adim'])
 80 |             schedule_sample = self.schedule_sample(inputs['T'], B, hparams)
 81 |             for t in range(inputs['T']):
 82 |                 if hparams.append_last_action:
 83 |                     if t > 0 and is_train:
 84 |                         real_action = inputs['real_actions'][:, t - 1]
 85 |                         last_action = tf.where(schedule_sample[t - 1], real_action, action_predictions[-1][:, 0])
 86 |                     elif t > 0:
 87 |                         last_action = action_predictions[-1][:, 0]
 88 |                     in_t = tf.concat([lstm_in[:, t], last_action], axis=-1)
 89 |                 else:
 90 |                     in_t = lstm_in[:, t]
 91 | 
 92 |                 if t == 0:
 93 |                     if 'context_frames' in inputs:
 94 |                         assert hparams.append_last_action
 95 |                         context_encodings = [encoder(inputs['context_frames'][:, c]) for c in range(hparams.context_actions)]
 96 | 
 97 |                         for i, c in enumerate(context_encodings):
 98 |                             dense = layers.Dense(hparams.latent_dim)(c)
 99 |                             context_enc = layers.BatchNormalization(axis=-1)(tf.nn.relu(dense), training=is_train)
100 |                             if i == 0:
101 |                                 context_act = tf.zeros_like(inputs['context_actions'][:, 0])
102 |                             else:
103 |                                 context_act = inputs['context_actions'][:, i - 1]
104 |                             
105 |                             context_in = tf.concat((context_enc, context_act), axis=-1)
106 |                             if i == 0:
107 |                                 hidden_state = lstm.get_initial_state(context_in[:, None])
108 |                             _, hidden_state = lstm.cell(context_in, hidden_state)
109 |                     else:
110 |                         hidden_state = lstm.get_initial_state(in_t[:, None])
111 | 
112 |                 lstm_out, hidden_state = lstm.cell(in_t, hidden_state)
113 |                 action_predictions.append(top_layer(lstm_out)[:, None])
114 |                 
115 | 
116 |             outputs['pred_actions'] = tf.concat(action_predictions, axis=1)
117 |             if hparams.append_last_action and inputs['T'] > 1:
118 |                 outputs['ground_truth_sampling_mean'] = tf.reduce_mean(tf.to_float(schedule_sample))
119 | 
120 |             return outputs
121 | 
122 |     @staticmethod
123 |     def default_hparams():
124 |         default_params =  {
125 |             "n_convs": 3,
126 |             "conv_filters": 512,
127 |             "enc_dim": 128,
128 |             "kernel_size": 3,
129 | 
130 |             "latent_dim": 20,
131 | 
132 |             "vgg_path": '~/',
133 |             "append_last_action": True,
134 |             "schedule_sampling_k": 900.0,
135 |             "schedule_sampling_steps": [0, 100000],
136 |         }
137 |         return dict(itertools.chain(BaseGraph.default_hparams().items(), default_params.items()))
138 | 
139 |     def schedule_sample(self, T, B, hparams):
140 |         if T == 1:
141 |             return
142 | 
143 |         ground_truth_sampling_shape = [T - 1, B]
144 |         
145 |         k = hparams.schedule_sampling_k
146 |         start_step = hparams.schedule_sampling_steps[0]
147 |         iter_num = tf.to_float(tf.train.get_or_create_global_step())
148 |         prob = (k / (k + tf.exp((iter_num - start_step) / k)))
149 |         prob = tf.cond(tf.less(iter_num, start_step), lambda: 1.0, lambda: prob)
150 | 
151 |         log_probs = tf.log([1 - prob, prob])
152 |         ground_truth_sampling = tf.multinomial([log_probs] * B, ground_truth_sampling_shape[0])
153 |         ground_truth_sampling = tf.cast(tf.transpose(ground_truth_sampling, [1, 0]), dtype=tf.bool)
154 |         # Ensure that eventually, the model is deterministically
155 |         # autoregressive (as opposed to autoregressive with very high probability).
156 |         ground_truth_sampling = tf.cond(tf.less(prob, 0.001),
157 |                                         lambda: tf.constant(False, dtype=tf.bool, shape=ground_truth_sampling_shape),
158 |                                         lambda: ground_truth_sampling)
159 |         return ground_truth_sampling


--------------------------------------------------------------------------------
/robonet/inverse_model/models/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudeepDasari/RoboNet/d83eee20f39653c3f8e7c349df7350e8a9e9f7a7/robonet/inverse_model/models/layers/__init__.py


--------------------------------------------------------------------------------
/robonet/inverse_model/models/layers/vgg_pretrain.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import os
 4 | 
 5 | 
 6 | def get_vgg_dict(path):
 7 |     return np.load(os.path.join(path, "vgg19.npy"), encoding='latin1', allow_pickle=True).item()
 8 | 
 9 | 
10 | def vgg_preprocess_images(image_tensor):
11 |     """
12 |     :param image_tensor: float 32 array of Batch x Height x Width x Channel immages (range 0 - 1)
13 |     :return: pre-processed images (ready to input to VGG)
14 |     """
15 |     vgg_mean = tf.convert_to_tensor(np.array([103.939, 116.779, 123.68], dtype=np.float32))
16 |     red, green, blue = tf.split(axis=-1, num_or_size_splits=3, value=image_tensor * 255) 
17 | 
18 |     return tf.concat(axis=3, values=[
19 |                         blue - vgg_mean[0],
20 |                         green - vgg_mean[1],
21 |                         red - vgg_mean[2],
22 |                      ])
23 | 
24 | 
25 | def vgg_conv(vgg_dict, bottom, name):
26 |     with tf.variable_scope(name, reuse=True):
27 |         filt = tf.constant(vgg_dict[name][0], name="filter")
28 | 
29 |         conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
30 | 
31 |         conv_biases = tf.constant(vgg_dict[name][1], name="biases")
32 |         bias = tf.nn.bias_add(conv, conv_biases)
33 | 
34 |         relu = tf.nn.relu(bias)
35 |         return relu
36 | 
37 | 
38 | def vgg_pool(bottom, name):
39 |         return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
40 | 


--------------------------------------------------------------------------------
/robonet/inverse_model/testing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudeepDasari/RoboNet/d83eee20f39653c3f8e7c349df7350e8a9e9f7a7/robonet/inverse_model/testing/__init__.py


--------------------------------------------------------------------------------
/robonet/inverse_model/testing/action_inference_interface.py:
--------------------------------------------------------------------------------
  1 | import ray
  2 | from robonet.inverse_model.models import get_models
  3 | import numpy as np
  4 | from robonet.video_prediction.utils import tf_utils
  5 | import tensorflow as tf
  6 | from tensorflow.contrib.training import HParams
  7 | import os
  8 | import glob
  9 | import math
 10 | import yaml
 11 | 
 12 | 
 13 | class ActionInferenceInterface(object):
 14 |     def __init__(self, model_path, test_hparams={}, n_gpus=1, first_gpu=0, sess=None):
 15 |         assert n_gpus == 1, "multi gpu evaluation not yet written"
 16 |         assert first_gpu == 0, "only starts building at gpu0"
 17 |         
 18 |         self._test_hparams = self._default_hparams().override_from_dict(test_hparams)
 19 |         self._model_path = os.path.expanduser(model_path)
 20 | 
 21 |         config_path = self._model_path + '/params.yaml'
 22 |         assert os.path.exists(config_path), 'Config path does not exist!'
 23 | 
 24 |         with open(config_path) as config:
 25 |             params = yaml.load(config, Loader=yaml.SafeLoader)
 26 |             self._model_hparams = params['model']
 27 |             self._input_hparams = params['dataset']
 28 | 
 29 |         # ensure vgg weights are restored correctly (a bit hacky for now)
 30 |         self._model_hparams['vgg_path'] = os.path.expanduser(self._test_hparams.vgg_path)
 31 | 
 32 |         print('\n\n------------------------------------ LOADED PARAMS ------------------------------------')
 33 |         for k, v in self._model_hparams.items():
 34 |             print('{} --> {}'.format(k, v))
 35 |         for k, v in self._input_hparams.items():
 36 |             print('{} --> {}'.format(k, v))
 37 |         print('---------------------------------------------------------------------------------------\n\n')
 38 |         
 39 |         InverseModel = get_models(self._model_hparams.pop('model'))
 40 |         self._model = model = InverseModel(self._input_hparams, n_gpus, self._model_hparams['graph_type'], False, self._model_hparams.pop('scope_name'))
 41 |         inputs, targets = self._build_input_targets()
 42 |         self._pred_act= model.model_fn(inputs, targets, tf.estimator.ModeKeys.PREDICT, self._model_hparams)
 43 |         
 44 |         self._sess = sess
 45 |         self._restored = False
 46 |     
 47 |     def _default_hparams(self):
 48 |         default_dict = {
 49 |             "run_batch_size": 1,
 50 |             "vgg_path": "~/"                # vgg19.npy should be in vgg_path folder (aka vgg_path = /path/to/folder/containing/weights/)
 51 |         }
 52 |         return HParams(**default_dict)
 53 |     
 54 |     def _build_input_targets(self):
 55 |         n_context = self._model_hparams.get('context_actions', 0)
 56 |         height, width = self._input_hparams['img_size']
 57 |         self._images_pl = tf.placeholder(tf.float32, [self._test_hparams.run_batch_size, 2 + n_context, height, width, 3])
 58 |         pl_dict = {'adim': self._input_hparams['target_adim'], 'T': self._input_hparams['load_T'] - 1, 'images': self._images_pl}
 59 |         
 60 |         if n_context:
 61 |             self._context_pl = tf.placeholder(tf.float32, [self._test_hparams.run_batch_size, self._model_hparams['context_actions'], 
 62 |                                                             self._input_hparams['target_adim']])
 63 |             pl_dict['context_actions'] = self._context_pl
 64 | 
 65 |         return pl_dict, {}
 66 |     
 67 |     def predict(self, start_image, goal_image, context_actions=None, context_frames=None):
 68 |         assert self._restored
 69 |         start_goal_image = np.concatenate((start_image[None, None], goal_image[None, None]), axis=1)
 70 |         fd = {self._images_pl: start_goal_image}
 71 |         if self._model_hparams.get('context_actions', 0):
 72 |             fd[self._images_pl] = np.concatenate((context_frames, start_goal_image), axis=1)
 73 |             fd[self._context_pl] = context_actions
 74 |         return self._sess.run(self._pred_act, feed_dict=fd)
 75 |     
 76 |     def __call__(self, start_image, goal_image, context_actions=None, context_frames=None):
 77 |         return self.predict(start_image, goal_image, context_actions, context_frames)
 78 | 
 79 |     def set_session(self, sess):
 80 |         self._sess = sess
 81 | 
 82 |     def restore(self):
 83 |         if self._sess is None:
 84 |             self._sess = tf.Session()
 85 |             self._sess.run(tf.global_variables_initializer())
 86 | 
 87 |         model_paths = glob.glob('{}/model-*'.format(self._model_path))
 88 |         max_model = max([int(m.split('.')[0].split('-')[-1]) for m in model_paths])
 89 |         restore_path = os.path.join(self._model_path, 'model-' + str(max_model))
 90 |         print('restoring', restore_path)
 91 |         
 92 |         checkpoints = [restore_path]
 93 |         # automatically skip global_step if more than one checkpoint is provided
 94 |         skip_global_step = len(checkpoints) > 1
 95 |         savers = []
 96 |         for checkpoint in checkpoints:
 97 |             print("creating restore saver from checkpoint %s" % checkpoint)
 98 |             saver, _ = tf_utils.get_checkpoint_restore_saver(checkpoint, skip_global_step=skip_global_step)
 99 |             savers.append(saver)
100 |         restore_op = [saver.saver_def.restore_op_name for saver in savers]
101 |         self._sess.run(restore_op)
102 |         self._restored = True
103 |     
104 |     @property
105 |     def horizon(self):
106 |         return self._input_hparams['load_T'] - 1
107 | 
108 |     @property
109 |     def context_actions(self):
110 |         return self._model_hparams.get('context_actions', 0)
111 | 


--------------------------------------------------------------------------------
/robonet/inverse_model/training/__init__.py:
--------------------------------------------------------------------------------
1 | def get_trainable(name):
2 |     if name == 'InverseTrainable':
3 |         from .inverse_trainable import InverseTrainable
4 |         return InverseTrainable
5 |     raise NotImplementedError
6 | 
7 | 


--------------------------------------------------------------------------------
/robonet/inverse_model/training/inverse_trainable.py:
--------------------------------------------------------------------------------
 1 | from robonet.video_prediction.training.trainable_interface import VPredTrainable
 2 | from robonet.inverse_model.models import get_models
 3 | import time
 4 | from tensorflow.contrib.training import HParams
 5 | from robonet.datasets.util.tensor_multiplexer import MultiplexedTensors
 6 | 
 7 | 
 8 | class InverseTrainable(VPredTrainable):
 9 |     def _get_model_class(self, model_name):
10 |         return get_models(model_name)
11 | 
12 |     def _default_hparams(self):
13 |         default_dict = {
14 |             'batch_size': 16,
15 |             'restore_dir': '',
16 |             'n_gpus': 1,
17 |             'scalar_summary_freq': 100,
18 |             'train_fraction': 0.9,
19 |             'val_fraction': 0.05,
20 |             'max_to_keep': 3,
21 |             'max_steps': 300000,
22 |             'tf_log_flush_freq': 500
23 |         }
24 |         return HParams(**default_dict)
25 |     
26 |     def _get_input_targets(self, DatasetClass, metadata, dataset_hparams):
27 |         data_loader = DatasetClass(self._hparams.batch_size, metadata, dataset_hparams)
28 | 
29 |         tensor_names = ['actions', 'images', 'states']
30 |         if 'annotations' in data_loader:
31 |             tensor_names = ['actions', 'images', 'states', 'annotations']
32 | 
33 |         self._tensor_multiplexer = MultiplexedTensors(data_loader, tensor_names)
34 |         loaded_tensors = [self._tensor_multiplexer[k] for k in tensor_names]
35 |         
36 |         self._real_annotations = None
37 |         assert loaded_tensors[1].get_shape().as_list()[2] == 1, "loader assumes one (potentially random) camera will be loaded in each example!"
38 |         self._real_images = loaded_tensors[1] = loaded_tensors[1][:, :, 0]              # grab cam 0 for images
39 |         if 'annotations' in data_loader:
40 |             self._real_annotations = loaded_tensors[3] = loaded_tensors[3][:, :, 0]     # grab cam 0 for annotations
41 |         
42 |         inputs, targets = {}, {'actions': loaded_tensors[0]}
43 |         for k, v in zip(tensor_names[1:], loaded_tensors[1:]):
44 |             inputs[k] = v
45 | 
46 |         self._data_loader = data_loader
47 |         return inputs, targets
48 | 
49 |     def _train(self):
50 |             itr = self.iteration
51 |             
52 |             # no need to increment itr since global step is incremented by train_op
53 |             loss, train_op = self._estimator.loss, self._estimator.train_op
54 |             fetches = {'global_step': itr}
55 | 
56 |             start = time.time()
57 |             train_loss = self.sess.run([loss, train_op], feed_dict=self._tensor_multiplexer.get_feed_dict('train'))[0]
58 |             fetches['metric/step_time'] = time.time() - start
59 |             fetches['metric/loss/train'] = train_loss
60 | 
61 |             if itr % self._hparams.scalar_summary_freq == 0:
62 |                 fetches['metric/loss/val'] = self.sess.run(loss, feed_dict=self._tensor_multiplexer.get_feed_dict('val'))
63 |                 for name in ['train', 'val']:
64 |                     metrics = self.sess.run(self._scalar_metrics, feed_dict=self._tensor_multiplexer.get_feed_dict(name))
65 |                     for key, value in metrics.items():
66 |                         fetches['metric/{}/{}'.format(key, name)] = value
67 |     
68 |             fetches['done'] = itr >= self._hparams.max_steps
69 |     
70 |             self._tf_log(fetches)
71 | 
72 |             return fetches
73 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/__init__.py:
--------------------------------------------------------------------------------
1 | from . import losses
2 | from . import metrics
3 | from . import ops
4 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/flow_ops.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def image_warp(im, flow):
 5 |     """Performs a backward warp of an image using the predicted flow.
 6 | 
 7 |     Args:
 8 |         im: Batch of images. [num_batch, height, width, channels]
 9 |         flow: Batch of flow vectors. [num_batch, height, width, 2]
10 |     Returns:
11 |         warped: transformed image of the same shape as the input image.
12 | 
13 |     Implementation taken from here: https://github.com/simonmeister/UnFlow
14 | 
15 |     maybe swap to # tf.contrib.image.dense_image_warp 
16 |     """
17 |     with tf.variable_scope('image_warp'):
18 | 
19 |         num_batch, height, width, channels = tf.unstack(tf.shape(im))
20 |         max_x = tf.cast(width - 1, 'int32')
21 |         max_y = tf.cast(height - 1, 'int32')
22 |         zero = tf.zeros([], dtype='int32')
23 | 
24 |         # We have to flatten our tensors to vectorize the interpolation
25 |         im_flat = tf.reshape(im, [-1, channels])
26 |         flow_flat = tf.reshape(flow, [-1, 2])
27 | 
28 |         # Floor the flow, as the final indices are integers
29 |         # The fractional part is used to control the bilinear interpolation.
30 |         flow_floor = tf.to_int32(tf.floor(flow_flat))
31 |         bilinear_weights = flow_flat - tf.floor(flow_flat)
32 | 
33 |         # Construct base indices which are displaced with the flow
34 |         pos_x = tf.tile(tf.range(width), [height * num_batch])
35 |         grid_y = tf.tile(tf.expand_dims(tf.range(height), 1), [1, width])
36 |         pos_y = tf.tile(tf.reshape(grid_y, [-1]), [num_batch])
37 | 
38 |         x = flow_floor[:, 0]
39 |         y = flow_floor[:, 1]
40 |         xw = bilinear_weights[:, 0]
41 |         yw = bilinear_weights[:, 1]
42 | 
43 |         # Compute interpolation weights for 4 adjacent pixels
44 |         # expand to num_batch * height * width x 1 for broadcasting in add_n below
45 |         wa = tf.expand_dims((1 - xw) * (1 - yw), 1) # top left pixel
46 |         wb = tf.expand_dims((1 - xw) * yw, 1) # bottom left pixel
47 |         wc = tf.expand_dims(xw * (1 - yw), 1) # top right pixel
48 |         wd = tf.expand_dims(xw * yw, 1) # bottom right pixel
49 | 
50 |         x0 = pos_x + x
51 |         x1 = x0 + 1
52 |         y0 = pos_y + y
53 |         y1 = y0 + 1
54 | 
55 |         x0 = tf.clip_by_value(x0, zero, max_x)
56 |         x1 = tf.clip_by_value(x1, zero, max_x)
57 |         y0 = tf.clip_by_value(y0, zero, max_y)
58 |         y1 = tf.clip_by_value(y1, zero, max_y)
59 | 
60 |         dim1 = width * height
61 |         batch_offsets = tf.range(num_batch) * dim1
62 |         base_grid = tf.tile(tf.expand_dims(batch_offsets, 1), [1, dim1])
63 |         base = tf.reshape(base_grid, [-1])
64 | 
65 |         base_y0 = base + y0 * width
66 |         base_y1 = base + y1 * width
67 |         idx_a = base_y0 + x0
68 |         idx_b = base_y1 + x0
69 |         idx_c = base_y0 + x1
70 |         idx_d = base_y1 + x1
71 | 
72 |         Ia = tf.gather(im_flat, idx_a)
73 |         Ib = tf.gather(im_flat, idx_b)
74 |         Ic = tf.gather(im_flat, idx_c)
75 |         Id = tf.gather(im_flat, idx_d)
76 | 
77 |         warped_flat = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])
78 |         warped = tf.reshape(warped_flat, [num_batch, height, width, channels])
79 |         warped.set_shape(im.shape)
80 | 
81 |         return warped
82 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/functional_ops.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.eager import context
 2 | from tensorflow.python.framework import constant_op
 3 | from tensorflow.python.framework import ops
 4 | from tensorflow.python.ops import array_ops
 5 | from tensorflow.python.ops import control_flow_ops
 6 | from tensorflow.python.ops import tensor_array_ops
 7 | from tensorflow.python.ops import variable_scope as vs
 8 | from tensorflow.python.util import nest
 9 | 
10 | 
11 | def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
12 |           swap_memory=False, name=None):
13 |   """
14 |   Same as tf.foldl but with support for a possibly nested sequence of tensors.
15 |   """
16 |   if not callable(fn):
17 |     raise TypeError("fn must be callable.")
18 | 
19 |   input_is_sequence = nest.is_sequence(elems)
20 |   input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]
21 |   def input_pack(x):
22 |     return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]
23 | 
24 |   if initializer is None:
25 |     output_is_sequence = input_is_sequence
26 |     output_flatten = input_flatten
27 |     output_pack = input_pack
28 |   else:
29 |     output_is_sequence = nest.is_sequence(initializer)
30 |     output_flatten = lambda x: nest.flatten(x) if output_is_sequence else [x]
31 |     def output_pack(x):
32 |       return (nest.pack_sequence_as(initializer, x)
33 |               if output_is_sequence else x[0])
34 | 
35 |   elems_flat = input_flatten(elems)
36 | 
37 |   in_graph_mode = context.in_graph_mode()
38 |   with ops.name_scope(name, "foldl", [elems]):
39 |     # TODO(akshayka): Remove the in_graph_mode check once caching devices are
40 |     # supported in Eager
41 |     if in_graph_mode:
42 |       # Any get_variable calls in fn will cache the first call locally
43 |       # and not issue repeated network I/O requests for each iteration.
44 |       varscope = vs.get_variable_scope()
45 |       varscope_caching_device_was_none = False
46 |       if varscope.caching_device is None:
47 |         # TODO(ebrevdo): Change to using colocate_with here and in other
48 |         # methods.
49 |         varscope.set_caching_device(lambda op: op.device)
50 |         varscope_caching_device_was_none = True
51 | 
52 |     # Convert elems to tensor array.
53 |     elems_flat = [
54 |         ops.convert_to_tensor(elem, name="elem") for elem in elems_flat]
55 | 
56 |     n = array_ops.shape(elems_flat[0])[0]
57 | 
58 |     # TensorArrays are always flat
59 |     elems_ta = [
60 |         tensor_array_ops.TensorArray(dtype=elem.dtype, size=n,
61 |                                      dynamic_size=False,
62 |                                      infer_shape=True)
63 |         for elem in elems_flat]
64 |     # Unpack elements
65 |     elems_ta = [
66 |         elem_ta.unstack(elem) for elem_ta, elem in zip(elems_ta, elems_flat)]
67 | 
68 |     if initializer is None:
69 |       a_flat = [elem.read(0) for elem in elems_ta]
70 |       i = constant_op.constant(1)
71 |     else:
72 |       initializer_flat = output_flatten(initializer)
73 |       a_flat = [ops.convert_to_tensor(init) for init in initializer_flat]
74 |       i = constant_op.constant(0)
75 | 
76 |     def compute(i, a_flat):
77 |       packed_elems = input_pack([elem_ta.read(i) for elem_ta in elems_ta])
78 |       packed_a = output_pack(a_flat)
79 |       a_out = fn(packed_a, packed_elems)
80 |       nest.assert_same_structure(
81 |           elems if initializer is None else initializer, a_out)
82 |       flat_a_out = output_flatten(a_out)
83 |       return (i + 1, flat_a_out)
84 | 
85 |     _, r_a = control_flow_ops.while_loop(
86 |         lambda i, a: i < n, compute, (i, a_flat),
87 |         parallel_iterations=parallel_iterations,
88 |         back_prop=back_prop,
89 |         swap_memory=swap_memory)
90 | 
91 |     # TODO(akshayka): Remove the in_graph_mode check once caching devices are
92 |     # supported in Eager
93 |     if in_graph_mode and varscope_caching_device_was_none:
94 |       varscope.set_caching_device(None)
95 | 
96 |     return output_pack(r_a)
97 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .normalization import fused_instance_norm
2 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/layers/encoder_layers.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from robonet.video_prediction.ops import lrelu, dense, conv2d, pool2d, get_norm_layer
 3 | 
 4 | 
 5 | def create_n_layer_encoder(inputs,
 6 |                            nz=8,
 7 |                            nef=64,
 8 |                            n_layers=3,
 9 |                            norm_layer='instance',
10 |                            stochastic=True):
11 |     norm_layer = get_norm_layer(norm_layer)
12 |     layers = []
13 |     paddings = [[0, 0], [1, 1], [1, 1], [0, 0]]
14 | 
15 |     with tf.variable_scope("layer_1"):
16 |         convolved = conv2d(tf.pad(inputs, paddings), nef, kernel_size=4, strides=2, padding='VALID')
17 |         rectified = lrelu(convolved, 0.2)
18 |         layers.append(rectified)
19 | 
20 |     for i in range(1, n_layers):
21 |         with tf.variable_scope("layer_%d" % (len(layers) + 1)):
22 |             out_channels = nef * min(2**i, 4)
23 |             convolved = conv2d(tf.pad(layers[-1], paddings), out_channels, kernel_size=4, strides=2, padding='VALID')
24 |             normalized = norm_layer(convolved)
25 |             rectified = lrelu(normalized, 0.2)
26 |             layers.append(rectified)
27 | 
28 |     pooled = pool2d(rectified, rectified.shape[1:3].as_list(), padding='VALID', pool_mode='avg')
29 |     squeezed = tf.squeeze(pooled, [1, 2])
30 | 
31 |     if stochastic:
32 |         with tf.variable_scope('z_mu'):
33 |             z_mu = dense(squeezed, nz)
34 |         with tf.variable_scope('z_log_sigma_sq'):
35 |             z_log_sigma_sq = dense(squeezed, nz)
36 |             z_log_sigma_sq = tf.clip_by_value(z_log_sigma_sq, -10, 10)
37 |         outputs = {'enc_zs_mu': z_mu, 'enc_zs_log_sigma_sq': z_log_sigma_sq}
38 |     else:
39 |         outputs = squeezed
40 |     return outputs


--------------------------------------------------------------------------------
/robonet/video_prediction/layers/normalization.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # =============================================================================
 15 | """Contains the normalization layer classes and their functional aliases."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | 
 21 | from tensorflow.contrib.framework.python.ops import variables
 22 | from tensorflow.contrib.layers.python.layers import utils
 23 | from tensorflow.python.framework import ops
 24 | from tensorflow.python.ops import array_ops
 25 | from tensorflow.python.ops import init_ops
 26 | from tensorflow.python.ops import nn
 27 | from tensorflow.python.ops import variable_scope
 28 | 
 29 | 
 30 | DATA_FORMAT_NCHW = 'NCHW'
 31 | DATA_FORMAT_NHWC = 'NHWC'
 32 | 
 33 | 
 34 | def fused_instance_norm(inputs,
 35 |                         center=True,
 36 |                         scale=True,
 37 |                         epsilon=1e-6,
 38 |                         activation_fn=None,
 39 |                         param_initializers=None,
 40 |                         reuse=None,
 41 |                         variables_collections=None,
 42 |                         outputs_collections=None,
 43 |                         trainable=True,
 44 |                         data_format=DATA_FORMAT_NHWC,
 45 |                         scope=None):
 46 |   """Functional interface for the instance normalization layer.
 47 | 
 48 |   Reference: https://arxiv.org/abs/1607.08022.
 49 | 
 50 |     "Instance Normalization: The Missing Ingredient for Fast Stylization"
 51 |     Dmitry Ulyanov, Andrea Vedaldi, Victor Lempitsky
 52 | 
 53 |   Args:
 54 |     inputs: A tensor with 2 or more dimensions, where the first dimension has
 55 |       `batch_size`. The normalization is over all but the last dimension if
 56 |       `data_format` is `NHWC` and the second dimension if `data_format` is
 57 |       `NCHW`.
 58 |     center: If True, add offset of `beta` to normalized tensor. If False, `beta`
 59 |       is ignored.
 60 |     scale: If True, multiply by `gamma`. If False, `gamma` is
 61 |       not used. When the next layer is linear (also e.g. `nn.relu`), this can be
 62 |       disabled since the scaling can be done by the next layer.
 63 |     epsilon: Small float added to variance to avoid dividing by zero.
 64 |     activation_fn: Activation function, default set to None to skip it and
 65 |       maintain a linear activation.
 66 |     param_initializers: Optional initializers for beta, gamma, moving mean and
 67 |       moving variance.
 68 |     reuse: Whether or not the layer and its variables should be reused. To be
 69 |       able to reuse the layer scope must be given.
 70 |     variables_collections: Optional collections for the variables.
 71 |     outputs_collections: Collections to add the outputs.
 72 |     trainable: If `True` also add variables to the graph collection
 73 |       `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
 74 |     data_format: A string. `NHWC` (default) and `NCHW` are supported.
 75 |     scope: Optional scope for `variable_scope`.
 76 | 
 77 |   Returns:
 78 |     A `Tensor` representing the output of the operation.
 79 | 
 80 |   Raises:
 81 |     ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
 82 |     ValueError: If the rank of `inputs` is undefined.
 83 |     ValueError: If rank or channels dimension of `inputs` is undefined.
 84 |   """
 85 |   inputs = ops.convert_to_tensor(inputs)
 86 |   inputs_shape = inputs.shape
 87 |   inputs_rank = inputs.shape.ndims
 88 | 
 89 |   if inputs_rank is None:
 90 |     raise ValueError('Inputs %s has undefined rank.' % inputs.name)
 91 |   if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
 92 |     raise ValueError('data_format has to be either NCHW or NHWC.')
 93 | 
 94 |   with variable_scope.variable_scope(
 95 |       scope, 'InstanceNorm', [inputs], reuse=reuse) as sc:
 96 |     if data_format == DATA_FORMAT_NCHW:
 97 |       reduction_axis = 1
 98 |       # For NCHW format, rather than relying on implicit broadcasting, we
 99 |       # explicitly reshape the params to params_shape_broadcast when computing
100 |       # the moments and the batch normalization.
101 |       params_shape_broadcast = list(
102 |           [1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)])
103 |     else:
104 |       reduction_axis = inputs_rank - 1
105 |       params_shape_broadcast = None
106 |     moments_axes = list(range(inputs_rank))
107 |     del moments_axes[reduction_axis]
108 |     del moments_axes[0]
109 |     params_shape = inputs_shape[reduction_axis:reduction_axis + 1]
110 |     if not params_shape.is_fully_defined():
111 |       raise ValueError('Inputs %s has undefined channels dimension %s.' % (
112 |           inputs.name, params_shape))
113 | 
114 |     # Allocate parameters for the beta and gamma of the normalization.
115 |     beta, gamma = None, None
116 |     dtype = inputs.dtype.base_dtype
117 |     if param_initializers is None:
118 |       param_initializers = {}
119 |     if center:
120 |       beta_collections = utils.get_variable_collections(
121 |           variables_collections, 'beta')
122 |       beta_initializer = param_initializers.get(
123 |           'beta', init_ops.zeros_initializer())
124 |       beta = variables.model_variable('beta',
125 |                                       shape=params_shape,
126 |                                       dtype=dtype,
127 |                                       initializer=beta_initializer,
128 |                                       collections=beta_collections,
129 |                                       trainable=trainable)
130 |       if params_shape_broadcast:
131 |         beta = array_ops.reshape(beta, params_shape_broadcast)
132 |     if scale:
133 |       gamma_collections = utils.get_variable_collections(
134 |           variables_collections, 'gamma')
135 |       gamma_initializer = param_initializers.get(
136 |           'gamma', init_ops.ones_initializer())
137 |       gamma = variables.model_variable('gamma',
138 |                                        shape=params_shape,
139 |                                        dtype=dtype,
140 |                                        initializer=gamma_initializer,
141 |                                        collections=gamma_collections,
142 |                                        trainable=trainable)
143 |       if params_shape_broadcast:
144 |         gamma = array_ops.reshape(gamma, params_shape_broadcast)
145 | 
146 |     if data_format == DATA_FORMAT_NHWC:
147 |       inputs = array_ops.transpose(inputs, list(range(1, reduction_axis)) + [0, reduction_axis])
148 |     if data_format == DATA_FORMAT_NCHW:
149 |       inputs = array_ops.transpose(inputs, list(range(2, inputs_rank)) + [0, reduction_axis])
150 |     hw, n, c = inputs.shape.as_list()[:-2], inputs.shape[-2].value, inputs.shape[-1].value
151 |     inputs = array_ops.reshape(inputs, [1] + hw + [n * c])
152 |     if inputs.shape.ndims != 4:
153 |         # combine all the spatial dimensions into only two, e.g. [D, H, W] -> [DH, W]
154 |         if inputs.shape.ndims > 4:
155 |             inputs_ndims4_shape = [1, hw[0], -1, n * c]
156 |         else:
157 |             inputs_ndims4_shape = [1, 1, -1, n * c]
158 |         inputs = array_ops.reshape(inputs, inputs_ndims4_shape)
159 |     beta = array_ops.reshape(array_ops.tile(beta[None, :], [n, 1]), [-1])
160 |     gamma = array_ops.reshape(array_ops.tile(gamma[None, :], [n, 1]), [-1])
161 | 
162 |     outputs, _, _ = nn.fused_batch_norm(
163 |         inputs, gamma, beta, epsilon=epsilon,
164 |         data_format=DATA_FORMAT_NHWC, name='instancenorm')
165 | 
166 |     outputs = array_ops.reshape(outputs, hw + [n, c])
167 |     if data_format == DATA_FORMAT_NHWC:
168 |       outputs = array_ops.transpose(outputs, [inputs_rank - 2] + list(range(inputs_rank - 2)) + [inputs_rank - 1])
169 |     if data_format == DATA_FORMAT_NCHW:
170 |       outputs = array_ops.transpose(outputs, [inputs_rank - 2, inputs_rank - 1] + list(range(inputs_rank - 2)))
171 | 
172 |     # if data_format == DATA_FORMAT_NHWC:
173 |     #   inputs = array_ops.transpose(inputs, [0, reduction_axis] + list(range(1, reduction_axis)))
174 |     # inputs_nchw_shape = inputs.shape
175 |     # inputs = array_ops.reshape(inputs, [1, -1] + inputs_nchw_shape.as_list()[2:])
176 |     # if inputs.shape.ndims != 4:
177 |     #     # combine all the spatial dimensions into only two, e.g. [D, H, W] -> [DH, W]
178 |     #     if inputs.shape.ndims > 4:
179 |     #         inputs_ndims4_shape = inputs.shape.as_list()[:2] + [-1, inputs_nchw_shape.as_list()[-1]]
180 |     #     else:
181 |     #         inputs_ndims4_shape = inputs.shape.as_list()[:2] + [1, -1]
182 |     #     inputs = array_ops.reshape(inputs, inputs_ndims4_shape)
183 |     # beta = array_ops.reshape(array_ops.tile(beta[None, :], [inputs_nchw_shape[0].value, 1]), [-1])
184 |     # gamma = array_ops.reshape(array_ops.tile(gamma[None, :], [inputs_nchw_shape[0].value, 1]), [-1])
185 |     #
186 |     # outputs, _, _ = nn.fused_batch_norm(
187 |     #     inputs, gamma, beta, epsilon=epsilon,
188 |     #     data_format=DATA_FORMAT_NCHW, name='instancenorm')
189 |     #
190 |     # outputs = array_ops.reshape(outputs, inputs_nchw_shape)
191 |     # if data_format == DATA_FORMAT_NHWC:
192 |     #   outputs = array_ops.transpose(outputs, [0] + list(range(2, inputs_rank)) + [1])
193 | 
194 |     if activation_fn is not None:
195 |       outputs = activation_fn(outputs)
196 |     return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
197 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/layers/vgg_network.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | def vgg_assign_from_values_fn(model='vgg16',
  6 |                               var_name_prefix='vgg/',
  7 |                               var_name_kernel_postfix='/kernel:0',
  8 |                               var_name_bias_postfix='/bias:0'):
  9 |     if model not in ('vgg16', 'vgg19'):
 10 |         raise ValueError('Invalid model %s' % model)
 11 |     import h5py
 12 |     WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/' \
 13 |                           '%s_weights_tf_dim_ordering_tf_kernels_notop.h5' % model
 14 |     weights_path = tf.keras.utils.get_file(
 15 |         '%s_weights_tf_dim_ordering_tf_kernels_notop.h5' % model,
 16 |         WEIGHTS_PATH_NO_TOP,
 17 |         cache_subdir='models')
 18 |     weights_file = h5py.File(weights_path, 'r')
 19 | 
 20 |     num_blocks = 5
 21 |     max_num_convs_in_block = 3 if model == 'vgg16' else 4
 22 | 
 23 |     weight_name_kernel_postfix = '_W_1:0'
 24 |     weight_name_bias_postfix = '_b_1:0'
 25 |     var_names_to_values = {}
 26 |     for block_id in range(num_blocks):
 27 |         for conv_id in range(max_num_convs_in_block):
 28 |             if block_id < 2 and conv_id >= 2:
 29 |                 continue
 30 |             name = 'block%d_conv%d' % (block_id + 1, conv_id + 1)
 31 |             var_names_to_values[var_name_prefix + name + var_name_kernel_postfix] = \
 32 |                 weights_file[name][name + weight_name_kernel_postfix][()]
 33 |             var_names_to_values[var_name_prefix + name + var_name_bias_postfix] = \
 34 |                 weights_file[name][name + weight_name_bias_postfix][()]
 35 |     return tf.contrib.framework.assign_from_values_fn(var_names_to_values)
 36 | 
 37 | 
 38 | def vgg16(rgb_image):
 39 |     """
 40 |         rgb_image: 4-D tensor with pixel intensities between 0 and 1.
 41 |     """
 42 |     bgr_mean = np.array([103.939, 116.779, 123.68], np.float32)
 43 |     rgb_scaled_image = rgb_image * 255.0
 44 |     bgr_scaled_image = rgb_scaled_image[:, :, :, ::-1]
 45 |     bgr_centered_image = bgr_scaled_image - tf.convert_to_tensor(bgr_mean)
 46 | 
 47 |     x = bgr_centered_image
 48 |     tensors = [x]
 49 |     features = []
 50 | 
 51 |     # Block1
 52 |     x = tf.layers.conv2d(x, 64, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block1_conv1')
 53 |     tensors.append(x)
 54 |     x = tf.layers.conv2d(x, 64, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block1_conv2')
 55 |     tensors.append(x)
 56 |     features.append(x)
 57 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block1_pool')
 58 |     tensors.append(x)
 59 | 
 60 |     # Block2
 61 |     x = tf.layers.conv2d(x, 128, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block2_conv1')
 62 |     tensors.append(x)
 63 |     x = tf.layers.conv2d(x, 128, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block2_conv2')
 64 |     tensors.append(x)
 65 |     features.append(x)
 66 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block2_pool')
 67 |     tensors.append(x)
 68 | 
 69 |     # Block3
 70 |     x = tf.layers.conv2d(x, 256, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block3_conv1')
 71 |     tensors.append(x)
 72 |     x = tf.layers.conv2d(x, 256, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block3_conv2')
 73 |     tensors.append(x)
 74 |     x = tf.layers.conv2d(x, 256, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block3_conv3')
 75 |     tensors.append(x)
 76 |     features.append(x)
 77 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block3_pool')
 78 |     tensors.append(x)
 79 | 
 80 |     # Block4
 81 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block4_conv1')
 82 |     tensors.append(x)
 83 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block4_conv2')
 84 |     tensors.append(x)
 85 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block4_conv3')
 86 |     tensors.append(x)
 87 |     features.append(x)
 88 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block4_pool')
 89 |     tensors.append(x)
 90 | 
 91 |     # Block5
 92 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block5_conv1')
 93 |     tensors.append(x)
 94 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block5_conv2')
 95 |     tensors.append(x)
 96 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block5_conv3')
 97 |     tensors.append(x)
 98 |     features.append(x)
 99 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block5_pool')
100 |     tensors.append(x)
101 | 
102 |     return tensors, features
103 | 
104 | 
105 | def vgg19(rgb_image):
106 |     """
107 |         rgb_image: 4-D tensor with pixel intensities between 0 and 1.
108 |     """
109 |     bgr_mean = np.array([103.939, 116.779, 123.68], np.float32)
110 |     rgb_scaled_image = rgb_image * 255.0
111 |     bgr_scaled_image = rgb_scaled_image[:, :, :, ::-1]
112 |     bgr_centered_image = bgr_scaled_image - tf.convert_to_tensor(bgr_mean)
113 | 
114 |     x = bgr_centered_image
115 |     tensors = [x]
116 |     features = []
117 | 
118 |     # Block1
119 |     x = tf.layers.conv2d(x, 64, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block1_conv1')
120 |     tensors.append(x)
121 |     x = tf.layers.conv2d(x, 64, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block1_conv2')
122 |     tensors.append(x)
123 |     features.append(x)
124 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block1_pool')
125 |     tensors.append(x)
126 | 
127 |     # Block2
128 |     x = tf.layers.conv2d(x, 128, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block2_conv1')
129 |     tensors.append(x)
130 |     x = tf.layers.conv2d(x, 128, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block2_conv2')
131 |     tensors.append(x)
132 |     features.append(x)
133 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block2_pool')
134 |     tensors.append(x)
135 | 
136 |     # Block3
137 |     x = tf.layers.conv2d(x, 256, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block3_conv1')
138 |     tensors.append(x)
139 |     x = tf.layers.conv2d(x, 256, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block3_conv2')
140 |     tensors.append(x)
141 |     x = tf.layers.conv2d(x, 256, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block3_conv3')
142 |     tensors.append(x)
143 |     x = tf.layers.conv2d(x, 256, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block3_conv4')
144 |     tensors.append(x)
145 |     features.append(x)
146 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block3_pool')
147 |     tensors.append(x)
148 | 
149 |     # Block4
150 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block4_conv1')
151 |     tensors.append(x)
152 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block4_conv2')
153 |     tensors.append(x)
154 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block4_conv3')
155 |     tensors.append(x)
156 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block4_conv4')
157 |     tensors.append(x)
158 |     features.append(x)
159 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block4_pool')
160 |     tensors.append(x)
161 | 
162 |     # Block5
163 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block5_conv1')
164 |     tensors.append(x)
165 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block5_conv2')
166 |     tensors.append(x)
167 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block5_conv3')
168 |     tensors.append(x)
169 |     x = tf.layers.conv2d(x, 512, (3, 3), padding='same', activation=tf.nn.relu, trainable=False, name='block5_conv4')
170 |     tensors.append(x)
171 |     features.append(x)
172 |     x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), padding='same', name='block5_pool')
173 |     tensors.append(x)
174 | 
175 |     return tensors, features
176 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/losses.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from robonet.video_prediction.ops import sigmoid_kl_with_logits
 4 | 
 5 | 
 6 | def l1_loss(pred, target):
 7 |     return tf.reduce_mean(tf.abs(target - pred))
 8 | 
 9 | 
10 | def l2_loss(pred, target):
11 |     return tf.reduce_mean(tf.square(target - pred))
12 | 
13 | 
14 | def gan_loss(logits, labels, gan_loss_type):
15 |     # use 1.0 (or 1.0 - discrim_label_smooth) for real data and 0.0 for fake data
16 |     if gan_loss_type == 'GAN':
17 |         # discrim_loss = tf.reduce_mean(-(tf.log(predict_real + EPS) + tf.log(1 - predict_fake + EPS)))
18 |         # gen_loss = tf.reduce_mean(-tf.log(predict_fake + EPS))
19 |         if labels in (0.0, 1.0):
20 |             labels = tf.constant(labels, dtype=logits.dtype, shape=logits.get_shape())
21 |             loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
22 |         else:
23 |             loss = tf.reduce_mean(sigmoid_kl_with_logits(logits, labels))
24 |     elif gan_loss_type == 'LSGAN':
25 |         # discrim_loss = tf.reduce_mean((tf.square(predict_real - 1) + tf.square(predict_fake)))
26 |         # gen_loss = tf.reduce_mean(tf.square(predict_fake - 1))
27 |         loss = tf.reduce_mean(tf.square(logits - labels))
28 |     elif gan_loss_type == 'SNGAN':
29 |         # this is the form of the loss used in the official implementation of the SNGAN paper, but it leads to
30 |         # worse results in our video prediction experiments
31 |         if labels == 0.0:
32 |             loss = tf.reduce_mean(tf.nn.softplus(logits))
33 |         elif labels == 1.0:
34 |             loss = tf.reduce_mean(tf.nn.softplus(-logits))
35 |         else:
36 |             raise NotImplementedError
37 |     else:
38 |         raise ValueError('Unknown GAN loss type %s' % gan_loss_type)
39 |     return loss
40 | 
41 | 
42 | def kl_loss(mu, log_sigma_sq):
43 |     sigma_sq = tf.exp(log_sigma_sq)
44 |     return -0.5 * tf.reduce_mean(tf.reduce_sum(1 + log_sigma_sq - tf.square(mu) - sigma_sq, axis=-1))
45 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .graphs import get_graph_class
 2 | 
 3 | 
 4 | def get_model(class_name):
 5 |     if class_name == 'deterministic':
 6 |         from .deterministic_generator import DeterministicModel
 7 |         return DeterministicModel
 8 |     else:
 9 |         raise NotImplementedError
10 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/models/base_model.py:
--------------------------------------------------------------------------------
 1 | from robonet.video_prediction.models import get_graph_class
 2 | from tensorflow.contrib.training import HParams
 3 | import itertools
 4 | import copy
 5 | 
 6 | 
 7 | class BaseModel(object):
 8 |     def __init__(self, data_loader_hparams, num_gpus, graph_type, tpu_mode=False, graph_scope=None):
 9 |         self._data_hparams = data_loader_hparams
10 |         self._num_gpus = num_gpus
11 |         self._graph_class = self._get_graph(graph_type)
12 |         self._tpu_mode = tpu_mode
13 |         if graph_scope is not None:
14 |             self._graph_scope = graph_scope
15 |         else:
16 |             self._graph_scope = self._default_scope()
17 |     
18 |     def _default_scope(self):
19 |         return 'vpred_model'
20 | 
21 |     def _get_graph(self, graph_type):
22 |         return get_graph_class(graph_type)
23 | 
24 |     def init_default_hparams(self, params):
25 |         graph_params = self._graph_class.default_hparams()
26 |         model_hparams = self._model_default_hparams()
27 |         default_hparams = dict(itertools.chain(graph_params.items(), model_hparams.items()))
28 | 
29 |         params = copy.deepcopy(params)
30 |         if self._tpu_mode:
31 |             self._summary_dir = params.pop('summary_dir')
32 |             self._summary_queue_len = params.pop('summary_queue_len')
33 |             self._image_summary_freq = params.pop('image_summary_freq')
34 | 
35 |         self._use_tpu = params.pop('use_tpu', None)
36 |         for k in list(params.keys()):
37 |             if k not in default_hparams:
38 |                 params.pop(k)
39 |                 print('key {} specified but is not in hparams!')
40 | 
41 |         self._hparams = HParams(**default_hparams).override_from_dict(params)
42 |         self._hparams.use_tpu = self._use_tpu
43 |     
44 |     def model_fn(self, features, labels, mode, params):
45 |         self.init_default_hparams(params)
46 |         return self._model_fn(features, labels, mode)
47 | 
48 |     def _model_default_hparams(self):
49 |         raise NotImplementedError
50 |     
51 |     def _model_fn(self, inputs, targets, mode):
52 |         raise NotImplementedError
53 | 
54 |     @property
55 |     def scope_name(self):
56 |         return self._graph_scope
57 | 
58 |     @property
59 |     def data_hparams(self):
60 |         return copy.deepcopy(self._data_hparams)
61 |     
62 |     @property
63 |     def model_hparams(self):
64 |         return copy.deepcopy(self._hparams)


--------------------------------------------------------------------------------
/robonet/video_prediction/models/deterministc_embedding_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | TODO: use self._data_hparams instead of hacking batch_size/sub_batch_size into the model_hparams
 3 | 
 4 | Boiled down version of SAVP model from https://github.com/alexlee-gk/video_prediction
 5 | """
 6 | import itertools
 7 | from robonet.video_prediction.utils import tf_utils
 8 | import tensorflow as tf
 9 | from robonet.video_prediction.models import get_graph_class
10 | from tensorflow.contrib.training import HParams
11 | import logging
12 | from collections import OrderedDict
13 | from robonet.video_prediction import losses
14 | from robonet.video_prediction.ops import lrelu, dense, pad2d, conv2d, conv_pool2d, flatten, tile_concat, pool2d, get_norm_layer
15 | from tensorflow.python.util import nest
16 | from robonet.video_prediction.layers.encoder_layers import create_n_layer_encoder
17 | 
18 | 
19 | def onestep_encoder_fn(targets, hparams=None):
20 |     image_pairs = tf.concat([targets['images'][:-1], targets['images'][1:]], axis=-1)
21 | 
22 |     targets = tile_concat([image_pairs, targets['actions'][:-1][:,:, None, None]], axis=-1)
23 | 
24 |     assert targets.shape.ndims == 5
25 | 
26 |     batch_shape = targets.shape[:-3].as_list()
27 |     targets = flatten(targets, 0, len(batch_shape) - 1)
28 |     unflatten = lambda x: tf.reshape(x, batch_shape + x.shape.as_list()[1:])
29 |     outputs = create_n_layer_encoder(targets, stochastic=hparams.stochastic)
30 |     return nest.map_structure(unflatten, outputs)
31 | 
32 | 
33 | def split_model_inference(inputs, targets, params):
34 |     """
35 |     we use separate trajectories for the encoder than from the ones used for prediction training
36 |     :param inputs: dict with tensors in *time-major*
37 |     :param targets:dict with tensors in *time-major*
38 |     :return:
39 |     """
40 |     def split(inputs, bs, sbs):
41 |         first_half = {}
42 |         second_half = {}
43 |         for key, value in inputs.items():
44 |             first_half[key] = []
45 |             second_half[key] = []
46 |             for i in range(bs // sbs):
47 |                 first_half[key].append(value[:, sbs * i:sbs * i + sbs // 2])
48 |                 second_half[key].append(value[:, sbs * i + sbs // 2:sbs * (i + 1)])
49 |             first_half[key] = tf.concat(first_half[key], 1)
50 |             second_half[key] = tf.concat(second_half[key], 1)
51 |         return first_half, second_half
52 | 
53 |     sbs = params.sub_batch_size
54 |     bs = params.batch_size
55 |     inputs_train, inputs_inference = split(inputs, bs, sbs)
56 |     targets_train, targets_inference = split(targets, bs, sbs)
57 | 
58 |     return {'train':inputs_train, 'inference':inputs_inference}, \
59 |            {'train':targets_train, 'inference':targets_inference}
60 | 
61 | 
62 | def average_and_repeat(enc, params, tlen):
63 |     """
64 |     :param enc:  time, batch, z_dim
65 |     :param params:
66 |     :param tlen: length of horizon
67 |     :return: e in time-major
68 |     """
69 | 
70 |     enc = tf.reduce_mean(enc, axis=0)   # average over time dimension
71 |     hsbs = params.sub_batch_size // 2
72 |     bs = params.batch_size
73 |     e = []
74 |     for i in range(bs // params.sub_batch_size):
75 |         averaged = tf.reduce_mean(enc[i*hsbs: (i+1)*hsbs], axis=0)  # average over sub-batch dimension
76 |         averaged = tf.tile(averaged[None], [hsbs, 1])  # tile across sub-batch
77 |         e.append(averaged)
78 |     e = tf.concat(e, axis=0)
79 |     e = tf.tile(e[None], [tlen, 1, 1])
80 |     return e
81 | 
82 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/models/graphs/__init__.py:
--------------------------------------------------------------------------------
 1 | def get_graph_class(class_name):
 2 |     if class_name == 'c_dna_flow':
 3 |         from .dnaflow_graph import DNAFlowGraphWrapper
 4 |         return DNAFlowGraphWrapper
 5 |     elif class_name == 'deterministic_graph':
 6 |         from .deterministic_graph import DeterministicWrapper
 7 |         return DeterministicWrapper
 8 |     elif class_name == 'vgg_conv':
 9 |         from .vgg_conv_graph import VGGConvGraph
10 |         return VGGConvGraph
11 |     else:
12 |         raise NotImplementedError
13 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/models/graphs/base_graph.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class BaseGraph:
 5 |     def build_graph(self, inputs, hparams, n_gpus=1, scope_name='graph'):
 6 |         raise NotImplementedError
 7 | 
 8 |     @staticmethod
 9 |     def default_hparams():
10 |         return {
11 |             'sequence_length': 15,
12 |             'context_frames': 2,
13 |             'use_states': False
14 |         }
15 | 
16 |     @property
17 |     def vars(self):
18 |         return tf.trainable_variables(self._scope_name)
19 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/models/graphs/deterministic_graph.py:
--------------------------------------------------------------------------------
 1 | from .base_graph import BaseGraph
 2 | from robonet.video_prediction.layers.dnaflow_rnn_cell import VPredCell
 3 | import itertools
 4 | import tensorflow as tf
 5 | from robonet.video_prediction.utils import tf_utils
 6 | 
 7 | from robonet.video_prediction.layers.deterministic_embedding_rnn_cell import DetVPredCell
 8 | import pdb
 9 | 
10 | class DeterministicWrapper(BaseGraph):
11 |     def build_graph(self, mode, inputs, hparams, n_gpus=1, scope_name='dnaflow_generator'):
12 |         if hparams.use_states:
13 |             assert "states" in inputs, "graph is building with states but no states in inptus"
14 |         else:
15 |             inputs.pop('states', None)
16 |         outputs_enc = inputs.pop('outputs_enc', None)
17 |         
18 |         self._scope_name = scope_name
19 |         with tf.variable_scope(self._scope_name) as graph_scope:
20 |             # TODO: I really don't like this. Should just error at this point instead of padding
21 |             inputs = {name: tf_utils.maybe_pad_or_slice(input, hparams.sequence_length - 1)
22 |                 for name, input in inputs.items()}
23 | 
24 |             if outputs_enc is not None:
25 |                 inputs['e'] = outputs_enc
26 | 
27 |             cell = DetVPredCell(mode, inputs, hparams)
28 |             outputs, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32,
29 |                                     swap_memory=False, time_major=True)
30 | 
31 |             outputs = {name: output[hparams.context_frames - 1:] for name, output in outputs.items()}
32 |             outputs['ground_truth_sampling_mean'] = tf.reduce_mean(tf.to_float(cell.ground_truth[hparams.context_frames:]))
33 |         return outputs
34 | 
35 |     @property
36 |     def vars(self):
37 |         return tf.trainable_variables(self._scope_name)
38 | 
39 |     @staticmethod
40 |     def default_hparams():
41 |         default_params =  {
42 |             "where_add": "all",
43 |             'last_frames': 2,
44 |             'num_transformed_images': 4,
45 |             'prev_image_background': True,
46 |             'first_image_background': True,
47 |             'context_images_background': False,
48 |             'generate_scratch_image': False,
49 |             'transformation': "flow",
50 |             'conv_rnn': "lstm",
51 |             'norm_layer': "instance",
52 |             'ablation_conv_rnn_norm': False,
53 |             'downsample_layer': "conv_pool2d",
54 |             'upsample_layer': "upsample_conv2d",
55 |             'dependent_mask': True,
56 |             'c_dna_kernel_size': [5, 5],              # only used in CDNA/DNA mode
57 | 
58 |             'schedule_sampling': "inverse_sigmoid",
59 |             'schedule_sampling_k': 900.0,
60 |             'schedule_sampling_steps': [0, 100000],
61 |             
62 |             'renormalize_pixdistrib': True,
63 | 
64 |             'e_dim': None,  # gets populated inside in deterministic_embedding_generator.py
65 |             'sub_batch_size': None,   # gets poplated from dataset_hparam
66 |             'batch_size': None,      # gets poplated from dataset_hparam
67 |             'encoder': None,
68 |             'stochastic': False,
69 | 
70 |             # params below control size of model
71 |             'ngf': 32,
72 |             'encoder_layer_size_mult': [1, 2, 4],
73 |             'encoder_layer_use_rnn': [True, True, True],
74 |             'decoder_layer_size_mult': [2, 1, 1],
75 |             'decoder_layer_use_rnn': [True, True, False]
76 |         }
77 |         return dict(itertools.chain(BaseGraph.default_hparams().items(), default_params.items()))
78 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/models/graphs/dnaflow_graph.py:
--------------------------------------------------------------------------------
 1 | from .base_graph import BaseGraph
 2 | from robonet.video_prediction.layers.dnaflow_rnn_cell import VPredCell
 3 | import itertools
 4 | import tensorflow as tf
 5 | from robonet.video_prediction.utils import tf_utils
 6 | 
 7 | 
 8 | class DNAFlowGraphWrapper(BaseGraph):
 9 |     def build_graph(self, mode, inputs, hparams, n_gpus=1, scope_name='dnaflow_generator'):
10 |         if hparams.use_states:
11 |             assert "states" in inputs, "graph is building with states but no states in inptus"
12 |         else:
13 |             inputs.pop('states', None)
14 |         
15 |         self._scope_name = scope_name
16 |         outputs_enc = inputs.pop('outputs_enc', None)
17 |         with tf.variable_scope(self._scope_name) as graph_scope:
18 |             # TODO: I really don't like this. Should just error at this point instead of padding
19 |             inputs = {name: tf_utils.maybe_pad_or_slice(input, hparams.sequence_length - 1)
20 |                 for name, input in inputs.items()}
21 | 
22 |             if outputs_enc is not None:
23 |                 inputs['e'] = outputs_enc
24 | 
25 |             cell = VPredCell(mode, inputs, hparams)
26 |             outputs, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs['actions'].dtype,
27 |                                     swap_memory=False, time_major=True)
28 |         
29 |             outputs = {name: output[hparams.context_frames - 1:] for name, output in outputs.items()}
30 |             outputs['ground_truth_sampling_mean'] = tf.reduce_mean(tf.to_float(cell.ground_truth[hparams.context_frames:]))
31 |         return outputs
32 | 
33 |     @staticmethod
34 |     def default_hparams():
35 |         default_params =  {
36 |             "where_add": "all",
37 |             'last_frames': 2,
38 |             'num_transformed_images': 4,
39 |             'prev_image_background': True,
40 |             'first_image_background': True,
41 |             'context_images_background': False,
42 |             'generate_scratch_image': False,
43 |             'transformation': "flow",
44 |             'conv_rnn': "lstm",
45 |             'norm_layer': "instance",
46 |             'ablation_conv_rnn_norm': False,
47 |             'downsample_layer': "conv_pool2d",
48 |             'upsample_layer': "upsample_conv2d",
49 |             'dependent_mask': True,
50 |             'c_dna_kernel_size': [5, 5],              # only used in CDNA/DNA mode
51 | 
52 |             'schedule_sampling': "inverse_sigmoid",
53 |             'schedule_sampling_k': 900.0,
54 |             'schedule_sampling_steps': [0, 100000],
55 |             
56 |             'renormalize_pixdistrib': True,
57 | 
58 |             # params below control size of model
59 |             'ngf': 32,
60 |             'encoder_layer_size_mult': [1, 2, 4],
61 |             'encoder_layer_use_rnn': [True, True, True],
62 |             'decoder_layer_size_mult': [2, 1, 1],
63 |             'decoder_layer_use_rnn': [True, True, False]
64 |         }
65 |         return dict(itertools.chain(BaseGraph.default_hparams().items(), default_params.items()))
66 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/rnn_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Convolutional LSTM implementation."""
 17 | 
 18 | import tensorflow as tf
 19 | from tensorflow.python.framework import dtypes
 20 | from tensorflow.python.framework import tensor_shape
 21 | from tensorflow.python.ops import array_ops
 22 | from tensorflow.python.ops import init_ops
 23 | from tensorflow.python.ops import math_ops
 24 | from tensorflow.python.ops import nn_ops
 25 | from tensorflow.python.ops import rnn_cell_impl
 26 | from tensorflow.python.ops import variable_scope as vs
 27 | 
 28 | 
 29 | class BasicConv2DLSTMCell(rnn_cell_impl.RNNCell):
 30 |     """2D Convolutional LSTM cell with (optional) normalization and recurrent dropout.
 31 | 
 32 |     The implementation is based on: tf.contrib.rnn.LayerNormBasicLSTMCell.
 33 | 
 34 |     It does not allow cell clipping, a projection layer, and does not
 35 |     use peep-hole connections: it is the basic baseline.
 36 |     """
 37 |     def __init__(self, input_shape, filters, kernel_size,
 38 |                  forget_bias=1.0, activation_fn=math_ops.tanh,
 39 |                  normalizer_fn=None, separate_norms=True,
 40 |                  norm_gain=1.0, norm_shift=0.0,
 41 |                  dropout_keep_prob=1.0, dropout_prob_seed=None,
 42 |                  skip_connection=False, reuse=None):
 43 |         """Initializes the basic convolutional LSTM cell.
 44 | 
 45 |         Args:
 46 |             input_shape: int tuple, Shape of the input, excluding the batch size.
 47 |             filters: int, The number of filters of the conv LSTM cell.
 48 |             kernel_size: int tuple, The kernel size of the conv LSTM cell.
 49 |             forget_bias: float, The bias added to forget gates (see above).
 50 |             activation_fn: Activation function of the inner states.
 51 |             normalizer_fn: If specified, this normalization will be applied before the
 52 |                 internal nonlinearities.
 53 |             separate_norms: If set to `False`, the normalizer_fn is applied to the
 54 |                 concatenated tensor that follows the convolution, i.e. before splitting
 55 |                 the tensor. This case is slightly faster but it might be functionally
 56 |                 different, depending on the normalizer_fn (it's functionally the same
 57 |                 for instance norm but not for layer norm). Default: `True`.
 58 |             norm_gain: float, The layer normalization gain initial value. If
 59 |                 `normalizer_fn` is `None`, this argument will be ignored.
 60 |             norm_shift: float, The layer normalization shift initial value. If
 61 |                 `normalizer_fn` is `None`, this argument will be ignored.
 62 |             dropout_keep_prob: unit Tensor or float between 0 and 1 representing the
 63 |                 recurrent dropout probability value. If float and 1.0, no dropout will
 64 |                 be applied.
 65 |             dropout_prob_seed: (optional) integer, the randomness seed.
 66 |             skip_connection: If set to `True`, concatenate the input to the
 67 |                 output of the conv LSTM. Default: `False`.
 68 |             reuse: (optional) Python boolean describing whether to reuse variables
 69 |                 in an existing scope.  If not `True`, and the existing scope already has
 70 |                 the given variables, an error is raised.
 71 |         """
 72 |         super(BasicConv2DLSTMCell, self).__init__(_reuse=reuse)
 73 | 
 74 |         self._input_shape = input_shape
 75 |         self._filters = filters
 76 |         self._kernel_size = list(kernel_size) if isinstance(kernel_size, (tuple, list)) else [kernel_size] * 2
 77 |         self._forget_bias = forget_bias
 78 |         self._activation_fn = activation_fn
 79 |         self._normalizer_fn = normalizer_fn
 80 |         self._separate_norms = separate_norms
 81 |         self._g = norm_gain
 82 |         self._b = norm_shift
 83 |         self._keep_prob = dropout_keep_prob
 84 |         self._seed = dropout_prob_seed
 85 |         self._skip_connection = skip_connection
 86 |         self._reuse = reuse
 87 | 
 88 |         if self._skip_connection:
 89 |             output_channels = self._filters + self._input_shape[-1]
 90 |         else:
 91 |             output_channels = self._filters
 92 |         cell_size = tensor_shape.TensorShape(self._input_shape[:-1] + [self._filters])
 93 |         self._output_size = tensor_shape.TensorShape(self._input_shape[:-1] + [output_channels])
 94 |         self._state_size = rnn_cell_impl.LSTMStateTuple(cell_size, self._output_size)
 95 | 
 96 |     @property
 97 |     def output_size(self):
 98 |         return self._output_size
 99 | 
100 |     @property
101 |     def state_size(self):
102 |         return self._state_size
103 | 
104 |     def _norm(self, inputs, scope):
105 |         shape = inputs.get_shape()[-1:]
106 |         gamma_init = init_ops.constant_initializer(self._g)
107 |         beta_init = init_ops.constant_initializer(self._b)
108 |         with vs.variable_scope(scope):
109 |             # Initialize beta and gamma for use by normalizer.
110 |             vs.get_variable("gamma", shape=shape, initializer=gamma_init)
111 |             vs.get_variable("beta", shape=shape, initializer=beta_init)
112 |         normalized = self._normalizer_fn(inputs, reuse=True, scope=scope)
113 |         return normalized
114 | 
115 |     def _conv2d(self, inputs):
116 |         output_filters = 4 * self._filters
117 |         input_shape = inputs.get_shape().as_list()
118 |         kernel_shape = list(self._kernel_size) + [input_shape[-1], output_filters]
119 |         kernel = vs.get_variable("kernel", kernel_shape, dtype=dtypes.float32,
120 |                                  initializer=init_ops.truncated_normal_initializer(stddev=0.02))
121 |         outputs = nn_ops.conv2d(inputs, kernel, [1] * 4, padding='SAME')
122 |         if not self._normalizer_fn:
123 |             bias = vs.get_variable('bias', [output_filters], dtype=dtypes.float32,
124 |                                    initializer=init_ops.zeros_initializer())
125 |             outputs = nn_ops.bias_add(outputs, bias)
126 |         return outputs
127 | 
128 |     def call(self, inputs, state):
129 |         """2D Convolutional LSTM cell with (optional) normalization and recurrent dropout."""
130 |         c, h = state
131 |         args = array_ops.concat([inputs, h], -1)
132 |         concat = self._conv2d(args)
133 | 
134 |         if self._normalizer_fn and not self._separate_norms:
135 |             concat = self._norm(concat, "input_transform_forget_output")
136 |         i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=-1)
137 |         if self._normalizer_fn and self._separate_norms:
138 |             i = self._norm(i, "input")
139 |             j = self._norm(j, "transform")
140 |             f = self._norm(f, "forget")
141 |             o = self._norm(o, "output")
142 | 
143 |         g = self._activation_fn(j)
144 |         if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1:
145 |             g = nn_ops.dropout(g, self._keep_prob, seed=self._seed)
146 | 
147 |         new_c = (c * math_ops.sigmoid(f + self._forget_bias)
148 |                  + math_ops.sigmoid(i) * g)
149 |         if self._normalizer_fn:
150 |             new_c = self._norm(new_c, "state")
151 |         new_h = self._activation_fn(new_c) * math_ops.sigmoid(o)
152 | 
153 |         if self._skip_connection:
154 |             new_h = array_ops.concat([new_h, inputs], axis=-1)
155 | 
156 |         new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h)
157 |         return new_h, new_state
158 | 
159 | 
160 | class Conv2DGRUCell(tf.nn.rnn_cell.RNNCell):
161 |     """2D Convolutional GRU cell with (optional) normalization.
162 | 
163 |     Modified from these:
164 |     https://github.com/carlthome/tensorflow-convlstm-cell/blob/master/cell.py
165 |     https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/rnn_cell_impl.py
166 |     """
167 |     def __init__(self, input_shape, filters, kernel_size,
168 |                  activation_fn=tf.tanh,
169 |                  normalizer_fn=None, separate_norms=True,
170 |                  bias_initializer=None, reuse=None):
171 |         super(Conv2DGRUCell, self).__init__(_reuse=reuse)
172 |         self._input_shape = input_shape
173 |         self._filters = filters
174 |         self._kernel_size = list(kernel_size) if isinstance(kernel_size, (tuple, list)) else [kernel_size] * 2
175 |         self._activation_fn = activation_fn
176 |         self._normalizer_fn = normalizer_fn
177 |         self._separate_norms = separate_norms
178 |         self._bias_initializer = bias_initializer
179 |         self._size = tensor_shape.TensorShape(self._input_shape[:-1] + [self._filters])
180 | 
181 |     @property
182 |     def state_size(self):
183 |         return self._size
184 | 
185 |     @property
186 |     def output_size(self):
187 |         return self._size
188 | 
189 |     def _norm(self, inputs, scope, bias_initializer):
190 |         shape = inputs.get_shape()[-1:]
191 |         gamma_init = init_ops.ones_initializer()
192 |         beta_init = bias_initializer
193 |         with vs.variable_scope(scope):
194 |             # Initialize beta and gamma for use by normalizer.
195 |             vs.get_variable("gamma", shape=shape, initializer=gamma_init)
196 |             vs.get_variable("beta", shape=shape, initializer=beta_init)
197 |         normalized = self._normalizer_fn(inputs, reuse=True, scope=scope)
198 |         return normalized
199 | 
200 |     def _conv2d(self, inputs, output_filters, bias_initializer):
201 |         input_shape = inputs.get_shape().as_list()
202 |         kernel_shape = list(self._kernel_size) + [input_shape[-1], output_filters]
203 |         kernel = vs.get_variable("kernel", kernel_shape, dtype=dtypes.float32,
204 |                                  initializer=init_ops.truncated_normal_initializer(stddev=0.02))
205 |         outputs = nn_ops.conv2d(inputs, kernel, [1] * 4, padding='SAME')
206 |         if not self._normalizer_fn:
207 |             bias = vs.get_variable('bias', [output_filters], dtype=dtypes.float32,
208 |                                    initializer=bias_initializer)
209 |             outputs = nn_ops.bias_add(outputs, bias)
210 |         return outputs
211 | 
212 |     def call(self, inputs, state):
213 |         bias_ones = self._bias_initializer
214 |         if self._bias_initializer is None:
215 |             bias_ones = init_ops.ones_initializer()
216 |         with vs.variable_scope('gates'):
217 |             inputs = array_ops.concat([inputs, state], axis=-1)
218 |             concat = self._conv2d(inputs, 2 * self._filters, bias_ones)
219 |             if self._normalizer_fn and not self._separate_norms:
220 |                 concat = self._norm(concat, "reset_update", bias_ones)
221 |             r, u = array_ops.split(concat, 2, axis=-1)
222 |             if self._normalizer_fn and self._separate_norms:
223 |                 r = self._norm(r, "reset", bias_ones)
224 |                 u = self._norm(u, "update", bias_ones)
225 |             r, u = math_ops.sigmoid(r), math_ops.sigmoid(u)
226 | 
227 |         bias_zeros = self._bias_initializer
228 |         if self._bias_initializer is None:
229 |             bias_zeros = init_ops.zeros_initializer()
230 |         with vs.variable_scope('candidate'):
231 |             inputs = array_ops.concat([inputs, r * state], axis=-1)
232 |             candidate = self._conv2d(inputs, self._filters, bias_zeros)
233 |             if self._normalizer_fn:
234 |                 candidate = self._norm(candidate, "state", bias_zeros)
235 | 
236 |         c = self._activation_fn(candidate)
237 |         new_h = u * state + (1 - u) * c
238 |         return new_h, new_h
239 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/testing/__init__.py:
--------------------------------------------------------------------------------
1 | from .model_evaluation_interface import VPredEvaluation
2 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/testing/model_evaluation_interface.py:
--------------------------------------------------------------------------------
  1 | import ray
  2 | from robonet.video_prediction.models import get_model
  3 | import numpy as np
  4 | import yaml
  5 | from robonet.video_prediction.utils import tf_utils
  6 | import tensorflow as tf
  7 | from tensorflow.contrib.training import HParams
  8 | import os
  9 | import math
 10 | import re
 11 | import glob
 12 | 
 13 | 
 14 | class VPredEvaluation(object):
 15 |     def __init__(self, model_path, test_hparams={}, n_gpus=1, first_gpu=0, sess=None):
 16 |         assert first_gpu == 0, "only starts building at gpu0"
 17 |         
 18 |         self._test_hparams = self._default_hparams().override_from_dict(test_hparams)
 19 |         self._model_path = os.path.expanduser(model_path)
 20 | 
 21 |         config_path = self._model_path + '/params.yaml'
 22 |         assert os.path.exists(config_path), 'Config path does not exist!'
 23 | 
 24 |         with open(config_path) as config:
 25 |             params = yaml.load(config, Loader=yaml.SafeLoader)
 26 |             self._model_hparams = params['model']
 27 |             self._input_hparams = params['dataset']
 28 | 
 29 |         print('\n\n------------------------------------ LOADED PARAMS ------------------------------------')
 30 |         for k, v in self._model_hparams.items():
 31 |             print('{} --> {}'.format(k, v))
 32 |         for k, v in self._input_hparams.items():
 33 |             print('{} --> {}'.format(k, v))
 34 |         print('---------------------------------------------------------------------------------------\n\n')
 35 |         
 36 |         PredictionModel = get_model(self._model_hparams.pop('model'))
 37 |         self._model = PredictionModel(self._input_hparams, n_gpus, self._model_hparams.pop('graph_type'), False, self._model_hparams.pop('scope_name'))
 38 |         self._outputs = self._model.model_fn(self._build_inputs(), {}, tf.estimator.ModeKeys.PREDICT, self._model_hparams)
 39 | 
 40 |         self._sess = sess
 41 |         self._restored = False
 42 |     
 43 |     def _default_hparams(self):
 44 |         default_dict = {
 45 |             "run_batch_size": 200,
 46 |             'tile_context': True,
 47 |             'designated_pixel_count': 0
 48 |         }
 49 |         return HParams(**default_dict)
 50 |     
 51 |     def _build_inputs(self):
 52 |         B_pl = self._test_hparams.run_batch_size
 53 |         if self._test_hparams.tile_context:
 54 |             B_pl = 1
 55 |         
 56 |         context_frames = self._model_hparams['context_frames']
 57 |         assert context_frames > 1, "needs at least 1 context action (so 2 frames)"
 58 |         
 59 |         input_length = self._model_hparams['sequence_length'] - 1
 60 |         pad_len = input_length - context_frames
 61 |         
 62 |         height, width = self._input_hparams['img_size']
 63 |         self._images_pl = tf.placeholder(tf.float32, [B_pl, context_frames, height, width, 3])
 64 |         self._states_pl = tf.placeholder(tf.float32, [B_pl, context_frames, self._input_hparams['target_sdim']])
 65 |         self._context_actions_pl = tf.placeholder(tf.float32, [B_pl, context_frames - 1, self._input_hparams['target_adim']])
 66 |         self._actions_pl = tf.placeholder(tf.float32, [self._test_hparams.run_batch_size, pad_len + 1, self._input_hparams['target_adim']])
 67 | 
 68 |         if self._test_hparams.designated_pixel_count:
 69 |             self._pixel_dist_pl = tf.placeholder(tf.float32, [B_pl, context_frames, height, width, self._test_hparams.designated_pixel_count])
 70 |             pad = tf.zeros((B_pl, pad_len, height, width, self._test_hparams.designated_pixel_count), dtype=tf.float32)
 71 |             input_pixel_distributions = tf.concat((self._pixel_dist_pl, pad), axis=1)
 72 |             if self._test_hparams.tile_context:
 73 |                 input_pixel_distributions = tf.tile(input_pixel_distributions, [self._test_hparams.run_batch_size, 1, 1, 1, 1])
 74 | 
 75 |         input_imgs = tf.concat((self._images_pl, tf.zeros((B_pl, pad_len, height, width, 3), dtype=tf.float32)), axis=1)
 76 |         input_states = tf.concat((self._states_pl, tf.zeros((B_pl, pad_len, self._input_hparams['target_sdim']), dtype=tf.float32)), axis=1)        
 77 |         if self._test_hparams.tile_context:
 78 |             input_states, context_actions = [tf.tile(tensor, [self._test_hparams.run_batch_size, 1, 1]) for tensor in [input_states, self._context_actions_pl]]
 79 |             input_imgs = tf.tile(input_imgs, [self._test_hparams.run_batch_size, 1, 1, 1, 1]) 
 80 |         else:
 81 |             context_actions = self._context_actions_pl
 82 |         
 83 |         input_actions = tf.concat((context_actions, self._actions_pl), axis=1)
 84 | 
 85 |         ret_dict = {'actions': input_actions, 'images': input_imgs, 'states': input_states}
 86 |         if self._test_hparams.designated_pixel_count:
 87 |             ret_dict['pixel_distributions'] =  input_pixel_distributions
 88 |         return ret_dict
 89 |     
 90 |     def predict(self, context_tensors, action_tensors):
 91 |         # assert self._restored, "must restore before testing can continue!"
 92 |         
 93 |         if self._test_hparams.tile_context:
 94 |             assert context_tensors['context_frames'].shape[1] == 1, "only one camera supported!"
 95 |             context_images = context_tensors['context_frames'][-self._model_hparams['context_frames']:, 0][None]
 96 |             context_actions = context_tensors['context_actions'][(1 - self._model_hparams['context_frames']):][None]
 97 |             context_states = context_tensors['context_states'][-self._model_hparams['context_frames']:][None]
 98 |         else:
 99 |             assert context_tensors['context_frames'].shape[2] == 1, "only one camera supported!"
100 |             context_images = context_tensors['context_frames'][:, -self._model_hparams['context_frames']:, 0]
101 |             context_actions = context_tensors['context_actions'][:, (1 - self._model_hparams['context_frames']):]
102 |             context_states = context_tensors['context_states'][:, -self._model_hparams['context_frames']:]
103 |         
104 |         if self._test_hparams.designated_pixel_count and self._test_hparams.tile_context:
105 |             context_distributions = context_tensors['context_pixel_distributions'][-self._model_hparams['context_frames']:, 0][None]
106 |         elif self._test_hparams.designated_pixel_count:
107 |             context_distributions = context_tensors['context_pixel_distributions'][:, -self._model_hparams['context_frames']:, 0]
108 |         else: 
109 |             context_distributions = None
110 | 
111 |         input_actions = action_tensors['actions']
112 |         n_runs = int(math.ceil(input_actions.shape[0] / float(self._test_hparams.run_batch_size)))
113 |         assert n_runs
114 | 
115 |         ret_dict = None
116 |         for n in range(n_runs):
117 |             selected_actions = input_actions[n * self._test_hparams.run_batch_size :(n + 1) * self._test_hparams.run_batch_size]
118 |             if selected_actions.shape[0] < self._test_hparams.run_batch_size:
119 |                 pad = np.zeros((self._test_hparams.run_batch_size - selected_actions.shape[0], selected_actions.shape[1], selected_actions.shape[2]))
120 |                 padded_actions = np.concatenate((selected_actions, pad), axis=0)
121 |             else:
122 |                 padded_actions = selected_actions
123 |             
124 |             run_t = self._feed(context_images, context_actions, context_states, context_distributions, padded_actions)
125 |             
126 |             for k in run_t.keys():
127 |                 run_t[k] = run_t[k][:selected_actions.shape[0]]
128 | 
129 |             if ret_dict is None:
130 |                 ret_dict = run_t
131 |             else:
132 |                 for k, v in run_t.items():
133 |                     ret_dict[k] = np.concatenate((ret_dict[k], v), axis=0)
134 |         return ret_dict
135 | 
136 |     def _feed(self, context_images, context_actions, context_states, context_distributions, input_actions):
137 |         if context_images.dtype == np.uint8:
138 |             context_images = context_images.astype(np.float32) / 255
139 |         
140 |         feed_dict = {self._images_pl: context_images,
141 |                         self._states_pl: context_states, 
142 |                         self._context_actions_pl: context_actions, 
143 |                         self._actions_pl: input_actions}
144 | 
145 |         if self._test_hparams.designated_pixel_count and context_distributions is None:
146 |             height, width = self._input_hparams['img_size']
147 |             context_distributions = np.zeros((self._test_hparams.batch_size, self._model_hparams['context_frames'], 
148 |                                                 height, width, self._test_hparams.designated_pixel_count), dtype=np.float32)
149 |             context_distributions[:, :, 0, 0] = 1.0
150 |             feed_dict[self._pixel_dist_pl] = context_distributions
151 |         elif self._test_hparams.designated_pixel_count:
152 |             feed_dict[self._pixel_dist_pl] = context_distributions
153 | 
154 |         return self._sess.run(self._outputs, feed_dict=feed_dict)
155 |     
156 |     def __call__(self, context_tensors, action_tensors):
157 |         return self.predict(context_tensors, action_tensors)
158 | 
159 |     def set_session(self, sess):
160 |         self._sess = sess
161 | 
162 |     def restore(self):
163 |         if self._restored:
164 |             return
165 | 
166 |         if self._sess is None:
167 |             self._sess = tf.Session()
168 |             self._sess.run(tf.global_variables_initializer())
169 |         
170 |         model_paths = glob.glob('{}/model*'.format(self._model_path))
171 |         assert model_paths, "models not found in {}!".format(self._model_path)
172 |         max_model = max([max(re.findall('\d+', m)) for m in model_paths])
173 |         meta_file = [m for m in model_paths if '.meta' in m and str(max_model) in m][0]
174 |         restore_path = meta_file[:meta_file.find('.meta')]
175 |         print('restoring', restore_path)
176 | 
177 |         checkpoints = [restore_path]
178 |         # automatically skip global_step if more than one checkpoint is provided
179 |         skip_global_step = len(checkpoints) > 1
180 |         savers = []
181 |         for checkpoint in checkpoints:
182 |             print("creating restore saver from checkpoint %s" % checkpoint)
183 |             saver, _ = tf_utils.get_checkpoint_restore_saver(checkpoint, skip_global_step=skip_global_step)
184 |             savers.append(saver)
185 |         restore_op = [saver.saver_def.restore_op_name for saver in savers]
186 |         self._sess.run(restore_op)
187 |         self._restored = True
188 | 
189 |     @property
190 |     def sequence_length(self):
191 |         return self._model_hparams['sequence_length']
192 |     
193 |     @property
194 |     def n_context(self):
195 |         return self._model_hparams['context_frames']
196 |     
197 |     @property
198 |     def horizon(self):
199 |         return self.sequence_length - self.n_context
200 | 
201 |     @property
202 |     def n_cam(self):
203 |         return 1
204 | 
205 |     @property
206 |     def img_size(self):
207 |         return self._input_hparams['img_size']
208 | 
209 |     @property
210 |     def adim(self):
211 |         return self._input_hparams['target_adim']
212 |     
213 |     @property
214 |     def sdim(self):
215 |         return self._input_hparams['target_sdim']
216 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/training/__init__.py:
--------------------------------------------------------------------------------
 1 | from .ray_util.gif_logger import GIFLogger
 2 | 
 3 | 
 4 | def get_trainable(class_name):
 5 |     if class_name == 'VPredTrainable':
 6 |         from .trainable_interface import VPredTrainable
 7 |         return VPredTrainable
 8 |     if class_name == 'BalancedCamFilter':
 9 |         from .data_filter import BalancedCamFilter
10 |         return BalancedCamFilter
11 |     if class_name == 'RobotSetFilter':
12 |         from .data_filter import RobotSetFilter
13 |         return RobotSetFilter
14 |     if class_name == 'RobotObjectFilter':
15 |         from .data_filter import RobotObjectFilter
16 |         return RobotObjectFilter
17 |     if class_name == 'BatchmixFinetuning':
18 |         from .finetuning_trainable_interface import BatchmixingVPredTrainable
19 |         return BatchmixingVPredTrainable
20 |     raise NotImplementedError
21 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/training/data_filter.py:
--------------------------------------------------------------------------------
 1 | from robonet.video_prediction.training.trainable_interface import VPredTrainable
 2 | import numpy as np
 3 | 
 4 | 
 5 | class BalancedCamFilter(VPredTrainable):
 6 | 
 7 |     def _default_hparams(self):
 8 |         params = super()._default_hparams()
 9 |         params.add_hparam('balanced_camera_configurations', True)
10 |         return params
11 | 
12 |     def _filter_metadata(self, metadata):
13 |         metadata = super()._filter_metadata(metadata)
14 | 
15 |         if self._hparams.balanced_camera_configurations:
16 |             assert self.dataset_hparams.get('sub_batch_size', 1) > 1
17 |             unique_cameras = metadata['camera_configuration'].frame.unique().tolist()   # all camera configs that are in  he dataset
18 |             all_metadata = metadata
19 |             metadata = [all_metadata[all_metadata['camera_configuration'] == r] for r in unique_cameras]
20 | 
21 |             # print('sizes after splitting metadata in camera configurations')
22 |             # for m, cam in zip(metadata, unique_cameras):
23 |             #     print('cam {} : numfiles {} robots: {}'.format(cam, len(m.files), m['robot'].frame.unique().tolist()))
24 |         return metadata
25 | 
26 | 
27 | class RobotSetFilter(VPredTrainable):
28 | 
29 |     def _default_hparams(self):
30 |         params = super()._default_hparams()
31 |         params.add_hparam('robot_set', ['sawyer', 'widowx', 'R3', 'franka'])
32 |         return params
33 | 
34 |     def _filter_metadata(self, metadata_list):
35 |         metadata_list = super()._filter_metadata(metadata_list)
36 | 
37 |         assert self._hparams.balance_across_robots, "need to balance accross robots!"
38 |         if self._hparams.robot_set is not None:
39 | 
40 |             new_metadata_list = []
41 |             for m in metadata_list:
42 |                 if m['robot'].frame.unique().tolist()[0] in self._hparams.robot_set:
43 |                     print('using robot', m['robot'].frame.unique().tolist())
44 |                     new_metadata_list.append(m)
45 |         return new_metadata_list
46 | 
47 | 
48 | class RobotObjectFilter(VPredTrainable):
49 |     def _default_hparams(self):
50 |         params = super()._default_hparams()
51 |         params.add_hparam('target_robot', '')
52 |         params.add_hparam('removed_object', '')
53 |         return params
54 |     
55 |     def _filter_metadata(self, metadata):
56 |         obj_exclude = metadata['object_classes'].frame.apply(lambda x: self._hparams.removed_object not in x)
57 |         not_robot_applied_to = metadata['robot'] != self._hparams.target_robot
58 |         x = metadata[np.logical_or(obj_exclude, not_robot_applied_to)]
59 |         return x
60 |         


--------------------------------------------------------------------------------
/robonet/video_prediction/training/finetuning_trainable_interface.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import glob
 3 | import pdb
 4 | 
 5 | from robonet.video_prediction.training.trainable_interface import VPredTrainable
 6 | from robonet.datasets import get_dataset_class, load_metadata
 7 | 
 8 | class BatchmixingVPredTrainable(VPredTrainable):
 9 | 
10 |     def _default_hparams(self):
11 |         params = super()._default_hparams()
12 |         params.add_hparam('robot_set', ['sawyer', 'widowx', 'R3', 'franka'])
13 |         return params
14 | 
15 |     def make_dataloaders(self,  config):
16 |         DatasetClass = get_dataset_class(self.dataset_hparams.pop('dataset'))
17 | 
18 |         # data from new domain
19 |         new_domain_metadata = self._filter_metadata(load_metadata(config['data_directory']))
20 | 
21 |         # data from old domain
22 |         old_domain_metadata = self._filter_metadata(load_metadata(config['batchmix_basedata']))
23 | 
24 |         old_metadata_list = []
25 |         for m in old_domain_metadata:
26 |             if m['robot'].frame.unique().tolist()[0] in self._hparams.robot_set:
27 |                 print('using robot', m['robot'].frame.unique().tolist())
28 |                 old_metadata_list.append(m)
29 | 
30 |         assert len(new_domain_metadata) == 1
31 |         metadata_list = new_domain_metadata*len(old_metadata_list) + old_metadata_list # make sure that we're using the same amount of data from old and new
32 | 
33 |         return self._get_input_targets(DatasetClass, metadata_list, self.dataset_hparams)
34 | 
35 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/training/ray_util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudeepDasari/RoboNet/d83eee20f39653c3f8e7c349df7350e8a9e9f7a7/robonet/video_prediction/training/ray_util/__init__.py


--------------------------------------------------------------------------------
/robonet/video_prediction/training/ray_util/gif_logger.py:
--------------------------------------------------------------------------------
 1 | import ray.cloudpickle as cloudpickle
 2 | from ray.tune.logger import Logger
 3 | import numpy as np
 4 | import os
 5 | import pickle as pkl
 6 | from robonet.video_prediction.utils.ffmpeg_gif import encode_gif
 7 | from robonet.video_prediction.utils.encode_img import construct_image_tile
 8 | 
 9 | 
10 | class GIFLogger(Logger):
11 |     def _init(self):
12 |         self._save_dir = os.path.join(self.logdir, 'metrics')
13 |         if not os.path.exists(self._save_dir):
14 |             os.makedirs(self._save_dir)
15 |     
16 |         self._metric_file = os.path.join(self._save_dir, 'metric_summaries.pkl')
17 |         if os.path.exists(self._metric_file):
18 |             self._metric_logs = pkl.load(open(self._metric_file, 'rb'))
19 |         else:
20 |             self._metric_logs = {}
21 |         self._image_logs = {}
22 |     
23 |     def flush(self):
24 |         with open(self._metric_file, 'wb') as f:
25 |             cloudpickle.dump(self._metric_logs, f)
26 |         
27 |         if self._image_logs:
28 |             img_dir = os.path.join(self._save_dir, 'images')
29 |             if not os.path.exists(img_dir):
30 |                 os.makedirs(img_dir)
31 |             for metric_name, summaries in self._image_logs.items():
32 |                 for step, encoding_type, encoded_im in summaries:
33 |                     assert encoding_type == 'GIF'
34 |                     file_name = '{}/{}_summary_{}.gif'.format(img_dir, metric_name, step)
35 |                     with open(os.path.join(self._save_dir, file_name), 'wb') as f:
36 |                         f.write(encoded_im)
37 |             self._image_logs = {}
38 |     
39 |     def on_result(self, result):
40 |         global_step = result['global_step']
41 | 
42 |         report_step = False
43 |         for k, v in result.items():
44 |             if 'metric/' not in k or 'step_time' in k:
45 |                 continue
46 |                         
47 |             report_step = True
48 |             tag = '_'.join(k.split('/')[1:])
49 |             if isinstance(v, np.ndarray):
50 |                 assert v.dtype == np.uint8 and len(v.shape) >= 4, 'assume np arrays are  batched image data'
51 |                 self._image_logs[tag] = self._image_logs.get(tag, []) + [(global_step, 'GIF', encode_gif(construct_image_tile(v), 4))]
52 |             else:
53 |                 self._metric_logs[tag] = self._metric_logs.get(tag, []) + [v]
54 |             
55 |         if report_step:
56 |             self._metric_logs['global_step'] = self._metric_logs.get('global_step', []) + [global_step]
57 | 
58 |         self.flush()
59 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/training/util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def stbmajor(ten):
 6 |     """
 7 |     swap time-batch major
 8 |     :param ten:  npy tenosr
 9 |     :return:
10 |     """
11 |     return np.transpose(ten, [1, 0] + list(range(2,len(ten.shape))))
12 | 
13 | 
14 | def pad(real_frames, pad_amount):
15 |     tensor = (real_frames * 255).astype(np.uint8)
16 |     height_pad = np.zeros((tensor.shape[0], tensor.shape[1], pad_amount, tensor.shape[-2], tensor.shape[-1]), dtype=np.uint8)
17 |     tensor = np.concatenate((height_pad, tensor, height_pad), axis=-3)
18 |     width_pad = np.zeros((tensor.shape[0], tensor.shape[1], tensor.shape[2], pad_amount, tensor.shape[-1]), dtype=np.uint8)
19 |     tensor = np.concatenate((width_pad, tensor, width_pad), axis=-2)
20 |     return tensor
21 | 
22 | def pad_and_concat(real_frames, pred_frames, pad_amount):
23 |     real, pred = [(x * 255).astype(np.uint8) for x in (real_frames, pred_frames)]
24 |     pred = np.concatenate([pred[:, 0][:, None] for _ in range(real.shape[1] - pred.shape[1])] + [pred], axis=1)
25 |     image_summary_tensors = []
26 |     for tensor in [real, pred]:
27 |         height_pad = np.zeros((tensor.shape[0], tensor.shape[1], pad_amount, tensor.shape[-2], tensor.shape[-1]), dtype=np.uint8)
28 |         tensor = np.concatenate((height_pad, tensor, height_pad), axis=-3)
29 |         width_pad = np.zeros((tensor.shape[0], tensor.shape[1], tensor.shape[2], pad_amount, tensor.shape[-1]), dtype=np.uint8)
30 |         tensor = np.concatenate((width_pad, tensor, width_pad), axis=-2)
31 |         image_summary_tensors.append(tensor)
32 |     tensor = np.concatenate(image_summary_tensors, axis=2)
33 |     return tensor
34 | 
35 | 
36 | def render_dist(dist):
37 |     rendered = np.zeros((dist.shape[0], dist.shape[1], dist.shape[2], dist.shape[3], 3), dtype=np.float32)
38 |     for b in range(dist.shape[0]):
39 |         for t in range(dist.shape[1]):
40 |             rendered[b,t] = np.squeeze(plt.cm.viridis(dist[b][t])[:, :, :3])
41 |     return rendered
42 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudeepDasari/RoboNet/d83eee20f39653c3f8e7c349df7350e8a9e9f7a7/robonet/video_prediction/utils/__init__.py


--------------------------------------------------------------------------------
/robonet/video_prediction/utils/encode_img.py:
--------------------------------------------------------------------------------
 1 | import imageio
 2 | import io
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | 
 7 | def construct_image_tile(tensor):
 8 |     assert len(tensor.shape) == 4 or len(tensor.shape) == 5, "assumes (B, H, W, C) or (B, T, H, W, C) tensor"
 9 |     return np.concatenate([im for im in tensor], axis=-2)
10 | 
11 | 
12 | def encode_images(tensor, fps=4):
13 |     if len(tensor.shape) == 3:
14 |         return cv2.imencode('.jpg', tensor[:, :, ::-1])[1]
15 |     elif len(tensor.shape) == 4:
16 |         buffer = io.BytesIO()
17 |         writer = imageio.get_writer(buffer, format='gif', fps=fps)
18 |         [writer.append_data(im) for im in tensor]
19 |         writer.close()
20 |         buffer.seek(0)
21 |         return buffer.read()
22 |     raise NotImplementedError
23 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/utils/ffmpeg_gif.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def save_gif(gif_fname, images, fps):
 7 |     """
 8 |     To generate a gif from image files, first generate palette from images
 9 |     and then generate the gif from the images and the palette.
10 |     ffmpeg -i input_%02d.jpg -vf palettegen -y palette.png
11 |     ffmpeg -i input_%02d.jpg -i palette.png -lavfi paletteuse -y output.gif
12 | 
13 |     Alternatively, use a filter to map the input images to both the palette
14 |     and gif commands, while also passing the palette to the gif command.
15 |     ffmpeg -i input_%02d.jpg -filter_complex "[0:v]split[x][z];[z]palettegen[y];[x][y]paletteuse" -y output.gif
16 | 
17 |     To directly pass in numpy images, use rawvideo format and `-i -` option.
18 |     """
19 |     from subprocess import Popen, PIPE
20 |     head, tail = os.path.split(gif_fname)
21 |     if head and not os.path.exists(head):
22 |         os.makedirs(head)
23 |     h, w, c = images[0].shape
24 |     cmd = ['ffmpeg', '-y',
25 |            '-f', 'rawvideo',
26 |            '-vcodec', 'rawvideo',
27 |            '-r', '%.02f' % fps,
28 |            '-s', '%dx%d' % (w, h),
29 |            '-pix_fmt', {1: 'gray', 3: 'rgb24', 4: 'rgba'}[c],
30 |            '-i', '-',
31 |            '-filter_complex', '[0:v]split[x][z];[z]palettegen[y];[x][y]paletteuse',
32 |            '-r', '%.02f' % fps,
33 |            '%s' % gif_fname]
34 |     proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
35 |     for image in images:
36 |         proc.stdin.write(image.tostring())
37 |     out, err = proc.communicate()
38 |     if proc.returncode:
39 |         err = '\n'.join([' '.join(cmd), err.decode('utf8')])
40 |         raise IOError(err)
41 |     del proc
42 | 
43 | 
44 | def encode_gif(images, fps):
45 |     from subprocess import Popen, PIPE
46 |     h, w, c = images[0].shape
47 |     cmd = ['ffmpeg', '-y',
48 |            '-f', 'rawvideo',
49 |            '-vcodec', 'rawvideo',
50 |            '-r', '%.02f' % fps,
51 |            '-s', '%dx%d' % (w, h),
52 |            '-pix_fmt', {1: 'gray', 3: 'rgb24', 4: 'rgba'}[c],
53 |            '-i', '-',
54 |            '-filter_complex', '[0:v]split[x][z];[z]palettegen[y];[x][y]paletteuse',
55 |            '-r', '%.02f' % fps,
56 |            '-f', 'gif',
57 |            '-']
58 |     proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
59 |     for image in images:
60 |         proc.stdin.write(image.tostring())
61 |     out, err = proc.communicate()
62 |     if proc.returncode:
63 |         err = '\n'.join([' '.join(cmd), err.decode('utf8')])
64 |         raise IOError(err)
65 |     del proc
66 |     return out
67 | 
68 | 
69 | def main():
70 |     images_shape = (12, 64, 64, 3)  # num_frames, height, width, channels
71 |     images = np.random.randint(256, size=images_shape).astype(np.uint8)
72 | 
73 |     save_gif('output_save.gif', images, 4)
74 |     with open('output_save.gif', 'rb') as f:
75 |         string_save = f.read()
76 | 
77 |     string_encode = encode_gif(images, 4)
78 |     with open('output_encode.gif', 'wb') as f:
79 |         f.write(string_encode)
80 | 
81 |     print(np.all(string_save == string_encode))
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     main()
86 | 


--------------------------------------------------------------------------------
/robonet/video_prediction/utils/html.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import dominate
  4 | from dominate.tags import *
  5 | 
  6 | 
  7 | class HTML:
  8 |     def __init__(self, web_dir, title, reflesh=0):
  9 |         self.title = title
 10 |         self.web_dir = web_dir
 11 |         self.img_dir = os.path.join(self.web_dir, 'images')
 12 |         if not os.path.exists(self.web_dir):
 13 |             os.makedirs(self.web_dir)
 14 |         if not os.path.exists(self.img_dir):
 15 |             os.makedirs(self.img_dir)
 16 |         # print(self.img_dir)
 17 | 
 18 |         self.doc = dominate.document(title=title)
 19 |         if reflesh > 0:
 20 |             with self.doc.head:
 21 |                 meta(http_equiv="reflesh", content=str(reflesh))
 22 |         self.t = None
 23 | 
 24 |     def get_image_dir(self):
 25 |         return self.img_dir
 26 | 
 27 |     def add_header1(self, str):
 28 |         with self.doc:
 29 |             h1(str)
 30 | 
 31 |     def add_header2(self, str):
 32 |         with self.doc:
 33 |             h2(str)
 34 | 
 35 |     def add_header3(self, str):
 36 |         with self.doc:
 37 |             h3(str)
 38 | 
 39 |     def add_table(self, border=1):
 40 |         self.t = table(border=border, style="table-layout: fixed;")
 41 |         self.doc.add(self.t)
 42 | 
 43 |     def add_row(self, txts, colspans=None):
 44 |         if self.t is None:
 45 |             self.add_table()
 46 |         with self.t:
 47 |             with tr():
 48 |                 if colspans:
 49 |                     assert len(txts) == len(colspans)
 50 |                     colspans = [dict(colspan=str(colspan)) for colspan in colspans]
 51 |                 else:
 52 |                     colspans = [dict()] * len(txts)
 53 |                 for txt, colspan in zip(txts, colspans):
 54 |                     style = "word-break: break-all;" if len(str(txt)) > 80 else "word-wrap: break-word;"
 55 |                     with td(style=style, halign="center", valign="top", **colspan):
 56 |                         with p():
 57 |                             if txt is not None:
 58 |                                 p(txt)
 59 | 
 60 |     def add_images(self, ims, txts, links, colspans=None, height=None, width=400):
 61 |         image_style = ''
 62 |         if height is not None:
 63 |             image_style += "height:%dpx;" % height
 64 |         if width is not None:
 65 |             image_style += "width:%dpx;" % width
 66 |         if self.t is None:
 67 |             self.add_table()
 68 |         with self.t:
 69 |             with tr():
 70 |                 if colspans:
 71 |                     assert len(txts) == len(colspans)
 72 |                     colspans = [dict(colspan=str(colspan)) for colspan in colspans]
 73 |                 else:
 74 |                     colspans = [dict()] * len(txts)
 75 |                 for im, txt, link, colspan in zip(ims, txts, links, colspans):
 76 |                     with td(style="word-wrap: break-word;", halign="center", valign="top", **colspan):
 77 |                         with p():
 78 |                             if im is not None and link is not None:
 79 |                                 with a(href=os.path.join('images', link)):
 80 |                                     img(style=image_style, src=os.path.join('images', im))
 81 |                             if im is not None and link is not None and txt is not None:
 82 |                                 br()
 83 |                             if txt is not None:
 84 |                                 p(txt)
 85 | 
 86 |     def save(self):
 87 |         html_file = '%s/index.html' % self.web_dir
 88 |         f = open(html_file, 'wt')
 89 |         f.write(self.doc.render())
 90 |         f.close()
 91 | 
 92 | 
 93 | if __name__ == '__main__':
 94 |     html = HTML('web/', 'test_html')
 95 |     html.add_header('hello world')
 96 | 
 97 |     ims = []
 98 |     txts = []
 99 |     links = []
100 |     for n in range(4):
101 |         ims.append('image_%d.jpg' % n)
102 |         txts.append('text_%d' % n)
103 |         links.append('image_%d.jpg' % n)
104 |     html.add_images(ims, txts, links)
105 |     html.save()
106 | 


--------------------------------------------------------------------------------
/robonet/yaml_util.py:
--------------------------------------------------------------------------------
 1 | import re, yaml, os, json
 2 | 
 3 | 
 4 | def parse_tune_config(config_file):
 5 |     """
 6 |     Configures custom yaml loading behavior and parses config file
 7 |     """
 8 |     import ray.tune as tune
 9 |     search_pattern = re.compile(r".*search\/(.*?)\((.*?)\)", re.VERBOSE)
10 |     def search_constructor(loader, node):
11 |         value = loader.construct_scalar(node)
12 |         search_type, args = search_pattern.match(value).groups()
13 |         if search_type == 'grid':
14 |             return tune.grid_search(json.loads(args))
15 |         raise NotImplementedError("search {} is not implemented".format(search_type))
16 |     yaml.add_implicit_resolver("!custom_search", search_pattern, Loader=yaml.SafeLoader)
17 |     yaml.add_constructor('!custom_search', search_constructor, Loader=yaml.SafeLoader)
18 | 
19 |     env_pattern = re.compile(r"\$\{(.*?)\}(.*)", re.VERBOSE)
20 |     def env_var_constructor(loader, node):
21 |         """
22 |         Converts ${VAR}/* from config file to 'os.environ[VAR] + *'
23 |         Modified from: https://www.programcreek.com/python/example/61563/yaml.add_implicit_resolver
24 |         """
25 |         value = loader.construct_scalar(node)
26 |         env_var, remainder = env_pattern.match(value).groups()
27 |         if env_var not in os.environ:
28 |             raise ValueError("config requires envirnonment variable {} which is not set".format(env_var))
29 |         return os.environ[env_var] + remainder
30 |     yaml.add_implicit_resolver("!env", env_pattern, Loader=yaml.SafeLoader)
31 |     yaml.add_constructor('!env', env_var_constructor, Loader=yaml.SafeLoader)
32 | 
33 |     with open(config_file) as config:
34 |         return yaml.load(config, Loader=yaml.SafeLoader)
35 | 
36 | 
37 | def parse_tpu_config(config_file):
38 |     """
39 |     Configures custom yaml loading behavior and parses config file
40 |     """
41 |     env_pattern = re.compile(r"\$\{(.*?)\}(.*)", re.VERBOSE)
42 |     def env_var_constructor(loader, node):
43 |         """
44 |         Converts ${VAR}/* from config file to 'os.environ[VAR] + *'
45 |         Modified from: https://www.programcreek.com/python/example/61563/yaml.add_implicit_resolver
46 |         """
47 |         value = loader.construct_scalar(node)
48 |         env_var, remainder = env_pattern.match(value).groups()
49 |         if env_var not in os.environ:
50 |             raise ValueError("config requires envirnonment variable {} which is not set".format(env_var))
51 |         return os.environ[env_var] + remainder
52 |     yaml.add_implicit_resolver("!env", env_pattern, Loader=yaml.SafeLoader)
53 |     yaml.add_constructor('!env', env_var_constructor, Loader=yaml.SafeLoader)
54 | 
55 |     with open(config_file) as config:
56 |         return yaml.load(config, Loader=yaml.SafeLoader)
57 | 


--------------------------------------------------------------------------------
/robonet_experiments/classifier_control/params.yaml:
--------------------------------------------------------------------------------
 1 | # example configuration file for training a set of video prediction model on sawyer data from RoboNet
 2 | # each model is trained on a different fraction of data
 3 | 
 4 | # general experiment configurations
 5 | batch_size: 16
 6 | train_class: NumericHDF5Dataset
 7 | max_steps: 300000
 8 | result_dir: ${VMPC_EXP}/classifier_control/vidpred_training
 9 | 
10 | # list of dictionaries containing data sources along with filter parameters
11 | batch_config:
12 |   # selects sawyer data with autograsp enabled (adim=4, robot=sawyer)
13 |   - data_directory: ${VMPC_DATA}/classifier_control/data_collection/sim/1_obj_cartgripper_xz_rejsamp
14 | 
15 | # loader_hparams used to initialize loader object
16 | loader_hparams:
17 |   dataset: "NumericHDF5Dataset"
18 |   buffer_size: 10
19 |   load_T: 15
20 |   random_shifts: True
21 |   resize_image: True
22 |   target_adim: 2
23 |   target_sdim: 3
24 | 
25 | # model_hparams used to create graph and loss function
26 | model_hparams:
27 |   model: deterministic
28 |   graph_type: c_dna_flow
29 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/capacity_test/base_model/flow.yaml:
--------------------------------------------------------------------------------
 1 | # example configuration file for training a set of video prediction model on sawyer data from RoboNet
 2 | # each model is trained on a different fraction of data
 3 | 
 4 | # general experiment configurations
 5 | batch_size: 16
 6 | train_class: VPredTrainable 
 7 | max_steps: 300000
 8 | train_fraction: search/grid([0.9, 0.1, 0.01, 0.001])
 9 | 
10 | # list of dictionaries containing data sources along with filter parameters
11 | batch_config:
12 |   # selects sawyer data with autograsp enabled (adim=4, robot=sawyer)
13 |   - data_directory: ${DATA_DIR}/hdf5
14 |     robot: "sawyer"
15 |     adim: 4
16 | 
17 | # loader_hparams used to initialize loader object
18 | loader_hparams:
19 |   dataset: "RoboNet"
20 |   buffer_size: 10
21 |   color_augmentation: 0.1
22 |   load_T: 15
23 | 
24 | # model_hparams used to create graph and loss function
25 | model_hparams:
26 |   model: deterministic
27 |   graph_type: vgg_conv
28 |   tv_weight: 0.0
29 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/capacity_test/base_model/noflow.yaml:
--------------------------------------------------------------------------------
 1 | # example configuration file for training a set of video prediction model on sawyer data from RoboNet
 2 | # each model is trained on a different fraction of data
 3 | 
 4 | # general experiment configurations
 5 | batch_size: 16
 6 | train_class: VPredTrainable 
 7 | max_steps: 500000
 8 | train_fraction: search/grid([0.9, 0.1, 0.01, 0.001])
 9 | 
10 | # list of dictionaries containing data sources along with filter parameters
11 | batch_config:
12 |   # selects sawyer data with autograsp enabled (adim=4, robot=sawyer)
13 |   - data_directory: ${DATA_DIR}/hdf5
14 |     robot: "sawyer"
15 |     adim: 4
16 | 
17 | # loader_hparams used to initialize loader object
18 | loader_hparams:
19 |   dataset: "RoboNet"
20 |   buffer_size: 10
21 |   color_augmentation: 0.1
22 |   load_T: 15
23 | 
24 | # model_hparams used to create graph and loss function
25 | model_hparams:
26 |   model: deterministic
27 |   graph_type: vgg_conv
28 |   use_flows: False
29 |   tv_weight: 0
30 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/capacity_test/large_model/flow.yaml:
--------------------------------------------------------------------------------
 1 | # example configuration file for training a set of video prediction model on sawyer data from RoboNet
 2 | # each model is trained on a different fraction of data
 3 | 
 4 | # general experiment configurations
 5 | batch_size: 12
 6 | train_class: VPredTrainable 
 7 | max_steps: 500000
 8 | train_fraction: 0.9
 9 | save_freq: 1000
10 | 
11 | # list of dictionaries containing data sources along with filter parameters
12 | batch_config:
13 |   # selects sawyer data with autograsp enabled (adim=4, robot=sawyer)
14 |   - data_directory: ${DATA_DIR}/hdf5
15 |     robot: search/grid(["sawyer", ["sawyer", "franka"], ["sawyer", "baxter"], ["sawyer", "baxter", "franka"]])
16 |     adim: 4
17 | 
18 | # loader_hparams used to initialize loader object
19 | loader_hparams:
20 |   dataset: "RoboNet"
21 |   buffer_size: 10
22 |   color_augmentation: 0.1
23 |   load_T: 15
24 | 
25 | # model_hparams used to create graph and loss function
26 | model_hparams:
27 |   model: deterministic
28 |   graph_type: vgg_conv
29 |   lr: 0.0001
30 |   context_frames: 5
31 |   schedule_sampling_k: 4000
32 |   use_flows: True
33 |   tv_weight: 0
34 |   enc_filters: [256, 512, 1024]
35 |   lstm_filters: 1024
36 |   dec_filters: [1024, 512]
37 |   img_flows: 32        
38 |   skip_flows: 16
39 | 
40 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/capacity_test/large_model/noflow.yaml:
--------------------------------------------------------------------------------
 1 | # example configuration file for training a set of video prediction model on sawyer data from RoboNet
 2 | # each model is trained on a different fraction of data
 3 | 
 4 | # general experiment configurations
 5 | batch_size: 16
 6 | train_class: VPredTrainable 
 7 | max_steps: 500000
 8 | train_fraction: search/grid([0.9, 0.1, 0.01, 0.001])
 9 | 
10 | # list of dictionaries containing data sources along with filter parameters
11 | batch_config:
12 |   # selects sawyer data with autograsp enabled (adim=4, robot=sawyer)
13 |   - data_directory: ${DATA_DIR}/hdf5
14 |     robot: "sawyer"
15 |     adim: 4
16 | 
17 | # loader_hparams used to initialize loader object
18 | loader_hparams:
19 |   dataset: "RoboNet"
20 |   buffer_size: 10
21 |   color_augmentation: 0.1
22 |   load_T: 15
23 | 
24 | # model_hparams used to create graph and loss function
25 | model_hparams:
26 |   model: deterministic
27 |   graph_type: vgg_conv
28 |   lr: 0.0001
29 |   context_frames: 5
30 |   schedule_sampling_k: 4000
31 |   use_flows: False
32 |   tv_weight: 0
33 |   enc_filters: [256, 512, 1024]
34 |   lstm_filters: 1024
35 |   dec_filters: [1024, 512]
36 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/finetune_baxter.yaml:
--------------------------------------------------------------------------------
 1 | # example configuration file for training a set of video prediction model on sawyer data from RoboNet
 2 | # each model is trained on a different fraction of data
 3 | 
 4 | # general experiment configurations
 5 | batch_size: 16
 6 | train_class: VPredTrainable 
 7 | restore_dir: ${RESTORE_DIR}
 8 | max_steps: 300000
 9 | 
10 | # list of dictionaries containing data sources along with filter parameters
11 | batch_config:
12 |   # selects newly colleced baxter data for finetuning
13 |   # note there is a 50% chance to select the new cloth data and a 50% chance to sample from one of the pretraining sources
14 |   - data_directory: ${DATA_DIR}/baxter_cloth
15 |     source_prob: 0.5
16 | 
17 |   # selects non-baxter data with autograsp enabled
18 |   # source _prob need not be set here - code will automatically set it to 1 - 0.5 (from above)
19 |   - data_directory: ${DATA_DIR}/hdf5
20 |     robot: ["sawyer", "widowx", "franka"]
21 |     adim: 4
22 | 
23 | # loader_hparams used to initialize loader object
24 | loader_hparams:
25 |   dataset: "RoboNet"
26 |   buffer_size: 10
27 |   load_T: 15
28 |   color_augmentation: 0.1
29 | 
30 | # model_hparams used to create graph and loss function
31 | model_hparams:
32 |   model: deterministic
33 |   graph_type: c_dna_flow
34 |   tv_weight: 0.001
35 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/pretrain_models/all_robonet/large.yaml:
--------------------------------------------------------------------------------
 1 | batch_size: 12
 2 | train_class: VPredTrainable 
 3 | max_steps: 500000
 4 | train_fraction: 0.9
 5 | save_freq: 5000
 6 | 
 7 | batch_config:
 8 |   - data_directory: ${DATA_DIR}/hdf5
 9 | 
10 | # loader_hparams used to initialize loader object
11 | loader_hparams:
12 |   dataset: "RoboNet"
13 |   buffer_size: 10
14 |   color_augmentation: 0.1
15 |   load_T: 15
16 |   target_adim: 4
17 |   action_mismatch: 3
18 |   state_mismatch: 3
19 | 
20 | # model_hparams used to create graph and loss function
21 | model_hparams:
22 |   model: deterministic
23 |   graph_type: vgg_conv
24 |   lr: 0.0001
25 |   context_frames: 5
26 |   schedule_sampling_k: 4000
27 |   use_flows: True
28 |   tv_weight: 0
29 |   enc_filters: [256, 512, 896]
30 |   lstm_filters: 896
31 |   dec_filters: [896, 512]
32 |   img_flows: 32        
33 |   skip_flows: 16
34 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/pretrain_models/all_robonet/medium.yaml:
--------------------------------------------------------------------------------
 1 | batch_size: 8
 2 | train_class: VPredTrainable 
 3 | max_steps: 500000
 4 | train_fraction: 0.9
 5 | save_freq: 5000
 6 | 
 7 | batch_config:
 8 |   - data_directory: ${DATA_DIR}/hdf5
 9 | 
10 | # loader_hparams used to initialize loader object
11 | loader_hparams:
12 |   dataset: "RoboNet"
13 |   buffer_size: 10
14 |   color_augmentation: 0.1
15 |   load_T: 15
16 |   target_adim: 4
17 |   action_mismatch: 3
18 |   state_mismatch: 3
19 | 
20 | # model_hparams used to create graph and loss function
21 | model_hparams:
22 |   model: deterministic
23 |   graph_type: vgg_conv
24 |   lr: 0.0001
25 |   context_frames: 5
26 |   schedule_sampling_k: 4000
27 |   use_flows: True
28 |   tv_weight: 0
29 |   enc_filters: [256, 512, 512]
30 |   lstm_filters: 512
31 |   dec_filters: [512, 512]
32 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/pretrain_models/autograsp/large.yaml:
--------------------------------------------------------------------------------
 1 | batch_size: 12
 2 | train_class: VPredTrainable 
 3 | max_steps: 500000
 4 | train_fraction: 0.9
 5 | save_freq: 5000
 6 | 
 7 | batch_config:
 8 |   - data_directory: ${DATA_DIR}/hdf5
 9 |     primitives: autograsp
10 |     adim: 4
11 |     robot: search/grid([["sawyer", "widowx", "baxter", "kuka"], ["sawyer", "widowx", "franka", "kuka"], ["sawyer", "widowx", "baxter", "franka"]])
12 | 
13 | # loader_hparams used to initialize loader object
14 | loader_hparams:
15 |   dataset: "RoboNet"
16 |   buffer_size: 10
17 |   color_augmentation: 0.1
18 |   load_T: 15
19 | 
20 | # model_hparams used to create graph and loss function
21 | model_hparams:
22 |   model: deterministic
23 |   graph_type: vgg_conv
24 |   lr: 0.0001
25 |   context_frames: 5
26 |   schedule_sampling_k: 4000
27 |   use_flows: True
28 |   tv_weight: 0
29 |   enc_filters: [256, 512, 896]
30 |   lstm_filters: 896
31 |   dec_filters: [896, 512]
32 |   img_flows: 32        
33 |   skip_flows: 16
34 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/pretrain_models/autograsp/medium.yaml:
--------------------------------------------------------------------------------
 1 | batch_size: 8
 2 | train_class: VPredTrainable 
 3 | max_steps: 500000
 4 | train_fraction: 0.9
 5 | save_freq: 5000
 6 | 
 7 | batch_config:
 8 |   - data_directory: ${DATA_DIR}/hdf5
 9 |     primitives: autograsp
10 |     adim: 4
11 |     robot: search/grid([["sawyer", "widowx", "baxter", "kuka"], ["sawyer", "widowx", "franka", "kuka"], ["sawyer", "widowx", "baxter", "franka"]])
12 | 
13 | # loader_hparams used to initialize loader object
14 | loader_hparams:
15 |   dataset: "RoboNet"
16 |   buffer_size: 10
17 |   color_augmentation: 0.1
18 |   load_T: 15
19 | 
20 | # model_hparams used to create graph and loss function
21 | model_hparams:
22 |   model: deterministic
23 |   graph_type: vgg_conv
24 |   lr: 0.0001
25 |   context_frames: 5
26 |   schedule_sampling_k: 4000
27 |   use_flows: True
28 |   tv_weight: 0
29 |   enc_filters: [256, 512, 512]
30 |   lstm_filters: 512
31 |   dec_filters: [512, 512]
32 | 


--------------------------------------------------------------------------------
/robonet_experiments/gpu/sawyer_grid_search.yaml:
--------------------------------------------------------------------------------
 1 | # example configuration file for training a set of video prediction model on sawyer data from RoboNet
 2 | # each model is trained on a different fraction of data
 3 | 
 4 | # general experiment configurations
 5 | batch_size: 16
 6 | train_class: VPredTrainable 
 7 | max_steps: 300000
 8 | train_fraction: search/grid([0.9, 0.1, 0.01, 0.001])
 9 | 
10 | # list of dictionaries containing data sources along with filter parameters
11 | batch_config:
12 |   # selects sawyer data with autograsp enabled (adim=4, robot=sawyer)
13 |   - data_directory: ${DATA_DIR}/hdf5
14 |     robot: "sawyer"
15 |     adim: 4
16 | 
17 | # loader_hparams used to initialize loader object
18 | loader_hparams:
19 |   dataset: "RoboNet"
20 |   buffer_size: 10
21 |   load_T: 15
22 |   color_augmentation: 0.1
23 | 
24 | # model_hparams used to create graph and loss function
25 | model_hparams:
26 |   model: deterministic
27 |   graph_type: vgg_conv
28 |   tv_weight: 0.0


--------------------------------------------------------------------------------
/robonet_experiments/inverse_model/discretized_inverse.yaml:
--------------------------------------------------------------------------------
 1 | batch_size: 16
 2 | train_class: InverseTrainable 
 3 | max_steps: 200000
 4 | 
 5 | 
 6 | # list of dictionaries containing data sources along with filter parameters
 7 | batch_config:
 8 |   # selects sawyer data with autograsp enabled (adim=4, robot=sawyer)
 9 |   - data_directory: ${DATA_DIR}/hdf5
10 |     robot: "sawyer"
11 |     adim: 4
12 | 
13 | # loader_hparams used to initialize loader object
14 | loader_hparams:
15 |   dataset: "RoboNet"
16 |   buffer_size: 10
17 |   load_T: 4  #search/grid([2, 4, 11])
18 | #  color_augmentation: 0.1
19 |   img_size: [192, 256]
20 | 
21 | # model_hparams used to create graph and loss function
22 | model_hparams:
23 |   model: DiscretizedInverseModel
24 |   graph_type: lstm_baseline
25 |   vgg_path: ${VGG_DIR}
26 |   context_actions: 2
27 |   enc_dim: 256
28 |   latent_dim: 64
29 |   append_last_action: true
30 | 


--------------------------------------------------------------------------------
/robonet_experiments/inverse_model/inverse.yaml:
--------------------------------------------------------------------------------
 1 | batch_size: 16
 2 | train_class: InverseTrainable 
 3 | max_steps: 100000
 4 | 
 5 | # list of dictionaries containing data sources along with filter parameters
 6 | batch_config:
 7 |   # selects sawyer data with autograsp enabled (adim=4, robot=sawyer)
 8 |   - data_directory: ${DATA_DIR}/hdf5
 9 |     robot: "sawyer"
10 |     adim: 4
11 | 
12 | # loader_hparams used to initialize loader object
13 | loader_hparams:
14 |   dataset: "RoboNet"
15 |   buffer_size: 10
16 |   load_T: search/grid([2, 3, 4])
17 |   load_random_cam: False
18 | #  color_augmentation: 0.1
19 |   img_size: [192, 256] 
20 | 
21 | # model_hparams used to create graph and loss function
22 | model_hparams:
23 |   model: DeterministicInverseModel
24 |   graph_type: lstm_baseline
25 |   vgg_path: ${VGG_DIR}
26 | 


--------------------------------------------------------------------------------
/robonet_experiments/tpu/capacity_test_flow.yaml:
--------------------------------------------------------------------------------
 1 | # general experiment and batch configs
 2 | data_directory: ${DATA_DIR}
 3 | save_dir: "model_save_large_lbls_flow_multibot_2"
 4 | batch_sizes: [4, 4, 4, 4]
 5 | robots: ['sawyer', 'franka', 'baxter', 'widowx'] 
 6 | max_steps: 600000
 7 | robots: ["sawyer"]
 8 | 
 9 | # loader_hparams used to initialize loader object
10 | loader_hparams:
11 |   load_T: 15
12 |   
13 | # model_hparams used to create graph and loss function
14 | model_hparams:
15 |   model: deterministic
16 |   graph_type: vgg_conv
17 |   tv_weight: 0
18 |   lr: 0.0001
19 |   context_frames: 5
20 |   enc_filters: [512, 1024, 1536]
21 |   lstm_filters: 1536
22 |   dec_filters: [1024, 512]
23 |   schedule_sampling_k: 4000
24 |   img_flows: 32
25 |   skip_flows: 16
26 | 


--------------------------------------------------------------------------------
/robonet_experiments/tpu/capacity_test_noflow.yaml:
--------------------------------------------------------------------------------
 1 | # general experiment and batch configs
 2 | data_directory: ${DATA_DIR}
 3 | save_dir: "model_save_large_lbls"
 4 | batch_sizes: [16]
 5 | max_steps: 600000
 6 | 
 7 | # loader_hparams used to initialize loader object
 8 | loader_hparams:
 9 |   load_T: 15
10 |   
11 | # model_hparams used to create graph and loss function
12 | model_hparams:
13 |   model: deterministic
14 |   graph_type: vgg_conv
15 |   use_flows: False
16 |   tv_weight: 0
17 |   lr: 0.0001
18 |   context_frames: 5
19 |   enc_filters: [512, 1024, 1792]
20 |   lstm_filters: 1792
21 |   dec_filters: [1024, 512]
22 |   schedule_sampling_k: 4000
23 | 


--------------------------------------------------------------------------------
/scripts/examples/create_prediction_gifs.py:
--------------------------------------------------------------------------------
  1 | from robonet.video_prediction.testing import VPredEvaluation
  2 | from robonet.yaml_util import parse_tune_config as parse_config
  3 | import os
  4 | import argparse
  5 | import tensorflow as tf
  6 | from robonet.datasets import get_dataset_class, load_metadata
  7 | from tensorflow.contrib.training import HParams
  8 | from robonet.datasets.util.tensor_multiplexer import MultiplexedTensors
  9 | import numpy as np
 10 | import imageio
 11 | 
 12 | 
 13 | class DataLoader:
 14 |     def __init__(self, config):
 15 |         # run hparams are passed in through config dict
 16 |         self.dataset_hparams, self.model_hparams, self._hparams = self._extract_hparams(config)
 17 |         self._inputs, self._targets = self._make_dataloaders(config)
 18 | 
 19 |     def _default_hparams(self):
 20 |         default_dict = {
 21 |             'batch_size': 16,
 22 |             'restore_dir': '',
 23 |             'n_gpus': 1,
 24 |             'pad_amount': 2,
 25 |             'scalar_summary_freq': 100,
 26 |             'image_summary_freq': 1000,
 27 |             'train_fraction': 0.9,
 28 |             'val_fraction': 0.05,
 29 |             'max_to_keep': 3,
 30 |             'max_steps': 300000,
 31 |         }
 32 |         return HParams(**default_dict)
 33 | 
 34 |     def _get_dataset_class(self, class_name):
 35 |         return get_dataset_class(class_name)
 36 | 
 37 |     def _extract_hparams(self, config):
 38 |         """
 39 |         Grabs and (optionally) modifies hparams
 40 |         """
 41 |         self._batch_config = config.pop('batch_config')
 42 |         dataset_hparams, model_hparams = config.pop('loader_hparams', {}), config.pop('model_hparams', {})
 43 |         hparams = self._default_hparams().override_from_dict(config)
 44 | 
 45 |         if 'splits' not in dataset_hparams:
 46 |             dataset_hparams['splits'] = [hparams.train_fraction, hparams.val_fraction, 1 - hparams.val_fraction - hparams.train_fraction]
 47 |             assert all([x >= 0 for x in dataset_hparams['splits']]), "invalid train/val fractions!"
 48 | 
 49 |         if 'sequence_length' in model_hparams and 'load_T' not in dataset_hparams:
 50 |             dataset_hparams['load_T'] = model_hparams['sequence_length']
 51 |         
 52 |         return dataset_hparams, model_hparams, hparams
 53 | 
 54 |     def _get_input_targets(self, DatasetClass, metadata, dataset_hparams):
 55 |         data_loader = DatasetClass(self._hparams.batch_size, metadata, dataset_hparams)
 56 | 
 57 |         tensor_names = ['actions', 'images', 'states']
 58 |         if 'annotations' in data_loader:
 59 |             tensor_names = ['actions', 'images', 'states', 'annotations']
 60 | 
 61 |         self._tensor_multiplexer = MultiplexedTensors(data_loader, tensor_names)
 62 |         loaded_tensors = [self._tensor_multiplexer[k] for k in tensor_names]
 63 |         
 64 |         self._real_annotations = None
 65 |         assert loaded_tensors[1].get_shape().as_list()[2] == 1, "loader assumes one (potentially random) camera will be loaded in each example!"
 66 |         self._real_images = loaded_tensors[1] = loaded_tensors[1][:, :, 0]              # grab cam 0 for images
 67 |         if 'annotations' in data_loader:
 68 |             self._real_annotations = loaded_tensors[3] = loaded_tensors[3][:, :, 0]     # grab cam 0 for annotations
 69 |         
 70 |         inputs, targets = {'actions': loaded_tensors[0]}, {}
 71 |         for k, v in zip(tensor_names[1:], loaded_tensors[1:]):
 72 |             inputs[k], targets[k] = v[:, :-1], v
 73 | 
 74 |         self._data_loader = data_loader
 75 |         return inputs, targets
 76 |     
 77 |     def _make_dataloaders(self, config):
 78 |         DatasetClass = self._get_dataset_class(self.dataset_hparams.pop('dataset'))
 79 |         sources, self.dataset_hparams['source_selection_probabilities'] = self._init_sources()
 80 |         
 81 |         inputs, targets = self._get_input_targets(DatasetClass, sources, self.dataset_hparams)
 82 |         return inputs, targets
 83 |     
 84 |     def _default_source_hparams(self):
 85 |         return {
 86 |             'data_directory': './',
 87 |             'source_prob': None,
 88 |             'balance_by_attribute': ['robot']             # split data source into multiple sources where for each source meta[attr] == a, (e.g all examples in one source come from a specific robot)
 89 |         }
 90 | 
 91 |     def _init_sources(self):
 92 |         loaded_metadata = {}
 93 |         sources, source_probs = [], []
 94 | 
 95 |         for source in self._batch_config:
 96 |             source_hparams = self._default_source_hparams()
 97 |             source_hparams.update(source)
 98 |             dir_path = os.path.realpath(os.path.expanduser(source_hparams['data_directory']))
 99 |             meta_data = loaded_metadata[dir_path] = loaded_metadata.get(dir_path, load_metadata(dir_path))
100 |             
101 |             for k, v in source_hparams.items():
102 |                 if k not in self._default_source_hparams():
103 |                     if k == 'object_classes':
104 |                         meta_data = meta_data.select_objects(v)
105 |                     elif isinstance(v, (list, tuple)):
106 |                         meta_data = meta_data[meta_data[k].frame.isin(v)]
107 |                     else:
108 |                         meta_data = meta_data[meta_data[k] == v]
109 |                     assert len(meta_data), "filters created empty data source!"
110 |             
111 |             if source_hparams['balance_by_attribute']:
112 |                 meta_data = [meta_data]
113 |                 for k in source_hparams['balance_by_attribute']:
114 |                     new_data = []
115 |                     for m in meta_data:
116 |                         unique_elems = m[k].frame.unique().tolist()
117 |                         new_data.extend([m[m[k] == u] for u in unique_elems])
118 |                     meta_data = new_data
119 |                 
120 |                 if source_hparams['source_prob']:
121 |                     new_prob = source_hparams['source_prob'] / float(len(meta_data))
122 |                     source_hparams['source_prob'] = [new_prob for _ in range(len(meta_data))]
123 |                 else:
124 |                     source_hparams['source_prob'] = [None for _ in range(len(meta_data))]
125 |                 
126 |                 sources.extend(meta_data)
127 |                 source_probs.extend(source_hparams['source_prob'])
128 |             else:
129 |                 source_probs.append(source_hparams['source_prob'])
130 |                 sources.append(meta_data)
131 | 
132 |         if any([s is not None for s in source_probs]):
133 |             set_probs = [s for s in source_probs if s is not None]
134 |             assert all([0 <= s <= 1 for s in set_probs]) and sum(set_probs) <= 1, "invalid probability distribution!"
135 |             if len(set_probs) != len(source_probs):
136 |                 remainder_prob = (1.0 - sum(set_probs)) / (len(source_probs) - len(set_probs))
137 |                 for i in range(len(source_probs)):
138 |                     if source_probs[i] is None:
139 |                         source_probs[i] = remainder_prob
140 |         else:
141 |             source_probs = None
142 | 
143 |         return sources, source_probs
144 | 
145 |     def get_batch(self, sess, mode='test'):
146 |         return sess.run([self._inputs, self._targets], feed_dict=self._tensor_multiplexer.get_feed_dict(mode))
147 | 
148 | 
149 | def get_prediction_batches(dataset, prediction_model, mode='test'):
150 |     batch = dataset.get_batch(prediction_model._sess, mode)
151 |     actions = batch[0]['actions']
152 |     states, images = [batch[1][x] for x in ('states', 'images')]
153 |     context = {
154 |             "context_frames": images[:, :prediction_model.n_context][:, :, None],
155 |             "context_actions": actions[:, :prediction_model.n_context - 1],
156 |             "context_states": states[:, :prediction_model.n_context]
157 |     }
158 |     real_actions = actions[:, prediction_model.n_context - 1:]
159 |     
160 |     real_prediction_batch = {'context_tensors': context, 'action_tensors': {'actions':real_actions}}
161 |     real_frames = images[:, prediction_model.n_context:]
162 |     return real_prediction_batch, real_frames
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     import pickle as pkl
167 |     parser = argparse.ArgumentParser()
168 |     parser.add_argument('experiment_file', type=str, help='path to YAML experiment config file')
169 |     parser.add_argument('prediction_checkpoint', type=str, help="path to video prediction model checkpoint folder")
170 |     parser.add_argument('--N', type=int, help="number of batches to run", default=1)
171 |     parser.add_argument('--n_gpus', type=int, help="number of GPUs to use during eval", default=1)
172 |     args = parser.parse_args()
173 |     args.experiment_file = os.path.expanduser(args.experiment_file)
174 |     args.prediction_checkpoint = os.path.expanduser(args.prediction_checkpoint)
175 | 
176 |     config = parse_config(args.experiment_file)
177 |     config.pop('train_class', None)
178 |     
179 |     batch_size = config['batch_size']
180 |     prediction_model = VPredEvaluation(args.prediction_checkpoint, {"run_batch_size": batch_size, 'tile_context': False}, n_gpus=args.n_gpus)
181 |     config['loader_hparams']['load_T'] = prediction_model.sequence_length
182 |     dataset = DataLoader(config)
183 |     prediction_model.restore()
184 |     
185 |     l1_errors = []
186 |     for n in range(args.N):
187 |         input_batch, real_frames = get_prediction_batches(dataset, prediction_model)
188 |         pred_frames = prediction_model(**input_batch)['predicted_frames'][:, :, 0]
189 |         n_pixels = pred_frames.shape[0] * pred_frames.shape[1] * pred_frames.shape[2] * pred_frames.shape[3]
190 |         l1_errors.append(np.sum(np.abs(pred_frames - real_frames)) / n_pixels)
191 | 
192 |         for b in range(batch_size):
193 |             for vid, name in zip([real_frames, pred_frames], ['real', 'pred']):
194 |                 images = (vid[b] * 255).astype(np.uint8)
195 |                 writer = imageio.get_writer('b{}_{}.gif'.format(n * batch_size + b, name))
196 |                 for t in range(images.shape[0]):
197 |                     writer.append_data(images[t])
198 |                 writer.close()
199 |     print('average l1 error', np.mean(l1_errors))
200 |     print('std l1 error', np.std(l1_errors))
201 | 


--------------------------------------------------------------------------------
/scripts/examples/test_franka_flow.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple script that shows the video-predictor API in action
 3 | """
 4 | 
 5 | 
 6 | from robonet.video_prediction.testing.model_evaluation_interface import VPredEvaluation
 7 | import numpy as np
 8 | 
 9 | test_hparams = {}
10 | test_hparams['designated_pixel_count'] = 1                  # number of selected pixels
11 | test_hparams['run_batch_size'] = 200                        # number of predictions run through model concurrently
12 | N_ACTIONS = 300                                             # total actions to predict: can be different from run_batch_size!
13 | 
14 | # feed in restore path and test specific hyperparams
15 | model = VPredEvaluation('~/Downloads/franka_sanity/sanity_check_model/checkpoint_170000', test_hparams)
16 | model.restore()
17 | 
18 | # context tensors needed for prediction
19 | context_tensors = {}
20 | context_tensors['context_actions'] = np.zeros((model.n_context - 1, model.adim))
21 | context_tensors['context_states'] = np.zeros((model.n_context, model.sdim))                              # not needed for all models
22 | height, width = model.img_size
23 | context_tensors['context_frames'] = np.zeros((model.n_context, model.n_cam, height, width, 3))           # inputs should be RGB float \in [0, 1]
24 | context_tensors['context_pixel_distributions'] = np.zeros((model.n_context, model.n_cam, height,         # spatial disributions (sum across image should be 1)
25 |                                                             width, test_hparams['designated_pixel_count']))
26 | context_tensors['context_pixel_distributions'][:, :, 24, 32, :] = 1.0
27 | 
28 | # actions for frames to be predicted
29 | action_tensors = {}
30 | action_tensors['actions'] = np.zeros((N_ACTIONS, model.horizon, model.adim))
31 | 
32 | results = model(context_tensors, action_tensors)
33 | predicted_frames = results['predicted_frames']                        # RGB images, shape (N_ACTIONS, HORIZON, N_CAMS, 48, 64, 3)
34 | predicted_distributions = results['predicted_pixel_distributions']    # pixel distributions, shape (N_ACTIONS, HORIZON, N_CAMS, 48, 64, designated_pixel_count)
35 | print('predicted_frames has shape', predicted_frames.shape)
36 | 


--------------------------------------------------------------------------------
/scripts/templates/index_template.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |         <style>
 4 |                 table, th, td {
 5 |                     border: 1px solid black;
 6 |                     border-collapse: collapse;
 7 |                 }
 8 |                 th, td {
 9 |                     padding: 15px;
10 |                 }
11 |         </style>
12 |         <title>Trajectory Viewer</title>
13 |     </head>
14 |     <body>
15 |         <h1>Dataset Visualizer </h1>
16 |         <table>
17 |             <tr>
18 |                 {% for name in filter_names %}
19 |                     <th> {{ name }} </th>
20 |                 {% endfor %}
21 |             </tr>
22 |             <tr>
23 |                 {% for f in filters %}
24 |                     <td>
25 |                         {% for traj in f %}
26 |                             <a href="{{ traj.url }}"> {{ traj.text }} </a>
27 |                             <br>
28 |                         {% endfor %}
29 |                     </td>
30 |                 {% endfor %}
31 |             </tr>
32 |         </table>
33 |     </body>
34 | </html>


--------------------------------------------------------------------------------
/scripts/templates/traj_template.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |         <style>
 4 |                 table, th, td {
 5 |                     border: 1px solid black;
 6 |                     border-collapse: collapse;
 7 |                 }
 8 |                 th, td {
 9 |                     padding: 5px;
10 |                 }
11 |         </style>
12 |         <title>Trajectory Viewer</title>
13 |     </head>
14 | 
15 |     <body>
16 |         <h1>RoboNet Trajectory {{ traj_name }} </h1>
17 | 
18 |         <h2> Meta-Data </h2>
19 |         <table>
20 |         {% for attr in attributes %}
21 |             <tr>
22 |                 <th>{{ attr.name }}</th>
23 |                 <td>{{ attr.value }}</td>
24 |             </tr>
25 |         {% endfor %}
26 |         </table>
27 |         <br> <br>
28 | 
29 |         <h2> Videos </h2>
30 |         <table>
31 |         <tr>    
32 |             {% for name in video_names %}
33 |                 <th> {{ name }} </th>
34 |             {% endfor %}
35 |         </tr>
36 | 
37 |         <tr>    
38 |             {% for video in videos %}
39 |                 <td>
40 |                     <video controls autoplay>
41 |                         <source src="{{ video.url }}" type="{{ video.type }}" />
42 |                     </video>
43 |                 </td>
44 |             {% endfor %}
45 |         </tr>
46 |         </table>
47 | 
48 |     </body>
49 | </html>


--------------------------------------------------------------------------------
/scripts/train_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from robonet import get_trainable, GIFLogger
 3 | import tensorflow as tf
 4 | import ray
 5 | import ray.tune as tune
 6 | from robonet.yaml_util import parse_tune_config as parse_config
 7 | import os
 8 | 
 9 | 
10 | def trial_str_creator(trial):
11 |     return "{}_{}".format(str(trial), trial.trial_id)
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('experiment_file', type=str, help='path to YAML experiment config file')
17 |     parser.add_argument('--local_mode', action='store_true', help="if flag enables local_mode")
18 |     parser.add_argument('--cluster', action='store_true', help="if flag enables cluster mode")
19 |     parser.add_argument('--resume', action='store_true', help="if flag provided resume from checkpoints rather than start from scratch")
20 |     parser.add_argument('--temp_dir', type=str, default=None, help="sets temp dir for ray redis (useful if permission error in /tmp/)")
21 |     parser.add_argument('--name', type=str, default=None, help="sets experiment name")
22 |     parser.add_argument('--n_gpus', type=int, default=1, help="number of GPUs to train on")
23 |     args = parser.parse_args()
24 |     config = parse_config(args.experiment_file)
25 |     config['n_gpus'] = args.n_gpus
26 | 
27 |     redis_address, max_failures, local_mode = None, 10, False
28 |     resume = config.pop('resume', args.resume)
29 |     if args.cluster or config.pop('cluster', False):
30 |         redis_address = ray.services.get_node_ip_address() + ':6379'
31 |         max_failures = 1000
32 |     elif args.local_mode or config.pop('local_mode', False):
33 |         resume=False
34 |         local_mode = True
35 |         max_failures = 0
36 |     
37 |     if args.temp_dir is None:
38 |         args.temp_dir = config.pop('temp_dir', None)
39 | 
40 |     if args.name is not None:
41 |         name = args.name
42 |         config.pop('name', None)
43 |     else:
44 |         name = config.pop('name', "{}_training".format(os.getlogin()))
45 | 
46 |     exp = tune.Experiment(
47 |                 name=name,
48 |                 run=get_trainable(config.pop('train_class')),
49 |                 trial_name_creator=tune.function(trial_str_creator),
50 |                 loggers=[GIFLogger],
51 |                 resources_per_trial= {"cpu": 1, "gpu": args.n_gpus},
52 |                 checkpoint_freq=config.pop('save_freq', 5000),
53 |                 upload_dir=config.pop('upload_dir', None),
54 |                 local_dir=config.pop('local_dir', None),
55 |                 config=config                                   # evaluate last to allow all popping above
56 |     )
57 |     
58 |     ray.init(redis_address=redis_address, local_mode=local_mode, temp_dir=args.temp_dir)
59 |     trials = tune.run(exp, queue_trials=True, resume=resume,
60 |                       checkpoint_at_end=True, max_failures=max_failures)
61 |     exit(0)
62 | 


--------------------------------------------------------------------------------
/scripts/train_vpred_tpu.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import os
 3 | from robonet.datasets import get_dataset_class
 4 | from robonet.video_prediction.models import get_model
 5 | import tensorflow as tf
 6 | from robonet.yaml_util import parse_tpu_config as parse_config
 7 | 
 8 | 
 9 | def dataset_fn(params, DatasetClass, batch_sizes, loader_files, dataset_hparams):
10 |     loader = DatasetClass(batch_sizes, loader_files, dataset_hparams)
11 |     inputs = {}
12 |     targets = {}
13 | 
14 |     inputs['actions'] = loader['actions']
15 |     inputs['images'] = loader['images'][:, :-1]
16 |     inputs['states'] = loader['states'][:, :-1]
17 | 
18 |     targets['images'] = loader['images']
19 |     targets['states'] = loader['states']
20 | 
21 |     return inputs, targets
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     import argparse
26 |     parser = argparse.ArgumentParser(description='launches video prediction training on tpu instances')
27 |     parser.add_argument('experiment_file', type=str, default='',  help='path of experiment file')
28 |     parser.add_argument('--testing', action='store_true', help='if flag is supplied then assume testing mode (model run on cpu)')
29 |     args = parser.parse_args()
30 |     
31 |     config = parse_config(args.experiment_file)
32 |     dataset_hparams = config.pop('loader_hparams')
33 |     model_hparams = config.pop('model_hparams')
34 | 
35 |     # add bucket_dir to hparams
36 |     if 'BUCKET' in os.environ and 'bucket_dir' not in dataset_hparams:
37 |         dataset_hparams['bucket_dir'] = os.environ['BUCKET']
38 |         config['save_dir'] = '{}/{}'.format(os.environ['BUCKET'], config['save_dir'])
39 | 
40 |     # extract train params from config
41 |     input_dir = os.path.expanduser(config['data_directory'])
42 |     batch_sizes = config['batch_sizes']
43 |     model_hparams['summary_dir'] = save_dir = os.path.expanduser(config['save_dir'])
44 |     train_steps_per_save = config.get('train_steps_per_save', 5000)
45 |     model_hparams['summary_queue_len'] = iter_per_loop = config.get('iter_per_loop', train_steps_per_save)
46 |     model_hparams['image_summary_freq'] = config.get('image_summary_freq', 500)
47 |     
48 |     robots = config.get('robots', ['sawyer'])
49 |     max_steps = config.get('max_steps', 300000)
50 | 
51 |     loader_files = ['{}/{}'.format(input_dir, r) for r in robots]
52 |     DatasetClass = get_dataset_class(dataset_hparams.pop('dataset', 'TPU'))
53 |     
54 |     train_input = functools.partial(dataset_fn, DatasetClass=DatasetClass, batch_sizes=batch_sizes, 
55 |                                     loader_files=loader_files, dataset_hparams=dataset_hparams)
56 |     
57 |     PredictionModel = get_model(model_hparams.pop('model'))
58 |     model = PredictionModel(None, 0, model_hparams.pop('graph_type'), True, '')
59 | 
60 |     tpu_cluster_resolver=None
61 |     if not args.testing:
62 |         tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(os.environ['TPU_NAME'], zone=os.environ['TPU_ZONE'], project=os.environ['PROJECT_ID'])
63 | 
64 |     tpu_config = tf.contrib.tpu.TPUConfig(iterations_per_loop=iter_per_loop)
65 |     run_config = tf.contrib.tpu.RunConfig(cluster=tpu_cluster_resolver, model_dir=save_dir, save_checkpoints_steps=train_steps_per_save,tpu_config=tpu_config)
66 | 
67 |     tf.logging.set_verbosity(tf.logging.DEBUG)
68 |     estimator = tf.contrib.tpu.TPUEstimator(model_fn=model.model_fn,
69 |                                             use_tpu=not args.testing,
70 |                                             train_batch_size=sum(batch_sizes),
71 |                                             eval_batch_size=sum(batch_sizes),
72 |                                             predict_batch_size=sum(batch_sizes),
73 |                                             params=model_hparams,
74 |                                             config=run_config)
75 |     
76 |     estimator.train(input_fn=train_input, max_steps=max_steps)
77 | 


--------------------------------------------------------------------------------
/scripts/visualize_dataset.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import imageio
 3 | from flask import Flask, render_template, url_for, redirect, abort, send_file
 4 | import argparse
 5 | app = Flask(__name__)
 6 | 
 7 | 
 8 | args=None
 9 | if __name__ == '__main__':
10 |     parser = argparse.ArgumentParser(description="Web based utility to visualize RoboNet trajectoriers (in hdf5 format). I don't even pretend like this is secure. Don't serve on a public website!")
11 |     parser.add_argument('input_dir', type=str, help='path to stored hdf5 files')
12 |     args = parser.parse_args()
13 |     f = open('scripts/test.mp4', 'rb')
14 |     vid = f.read()
15 |     f.close()
16 | 
17 | 
18 | @app.route('/')
19 | def index():
20 |     filter_names = ['filt0', 'filt1']
21 | 
22 |     traj0 = {'url': '/traj/0', 'text': 'here lies traj 0'}
23 |     traj10 = {'url': '/traj/10', 'text': 'here lies traj 10'}
24 |     filters = [[traj0], [traj0, traj10]]
25 | 
26 |     return render_template('index_template.html', filter_names=filter_names, filters=filters)
27 | 
28 | 
29 | @app.route('/traj/<int:traj_id>')
30 | def traj_page(traj_id):
31 |     if traj_id != 0 and traj_id != 10:      # page not found if traj id not valid
32 |         abort(404)
33 |     
34 |     attr_list = [{'name': 'robot', 'value': 'sawyer'}, {'name': 'te', 'value': 'st'}]
35 | 
36 |     vid_url = '/traj/{}/cam{}.mp4'.format(traj_id, 0)
37 |     name_list = ['cam0', 'cam1', 'cam2']
38 |     video_list = [{'url': vid_url, 'type':'video/mp4'}, {'url': vid_url, 'type':'video/mp4'}, {'url': vid_url, 'type':'video/mp4'}]
39 |     return render_template('traj_template.html', traj_name=str(traj_id), videos=video_list, video_names=name_list, attributes=attr_list)
40 | 
41 | 
42 | @app.route('/traj/<int:traj_id>/cam<int:cam_id>.mp4')
43 | def get_mp4(traj_id, cam_id):
44 |     if traj_id != 0 and traj_id != 10:      # page not found if traj id not valid
45 |         abort(404)
46 |     
47 |     if not 0 <= cam_id < 5:                         # page not found if camera id is invalid
48 |         abort(404)
49 | 
50 |     return send_file(
51 |             io.BytesIO(vid),
52 |             mimetype='video/mp4',
53 |             as_attachment=True,
54 |             attachment_filename='cam{}.mp4'.format(cam_id))
55 | 
56 | 
57 | @app.after_request
58 | def add_header(r):
59 |     """
60 |     Source: https://stackoverflow.com/questions/34066804/disabling-caching-in-flask
61 |     Add headers to both force latest IE rendering engine or Chrome Frame,
62 |     and also to cache the rendered page for 10 minutes.
63 |     """
64 |     r.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
65 |     r.headers["Pragma"] = "no-cache"
66 |     r.headers["Expires"] = "0"
67 |     r.headers['Cache-Control'] = 'public, max-age=0'
68 |     return r
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     # disable caching trick 2 (same source as above)
73 |     app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
74 |     app.run()
75 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # setup.py
2 | from setuptools import setup
3 | 
4 | setup(
5 |     name='RoboNet',
6 |     version='0.1.0',
7 |     packages=['robonet'],
8 | )
9 | 


--------------------------------------------------------------------------------