├── .flake8 ├── .gitattributes ├── .gitignore ├── .pre-commit-config.yaml ├── README.md ├── ae.pt ├── depth_vae.pt ├── docker └── Dockerfile ├── rgbd_vae.pt ├── src ├── cfg │ ├── .base.py.swn │ ├── base.py │ ├── cartpole.py │ ├── graph.py │ ├── graph_attention.py │ ├── graph_ddppo.py │ ├── graph_debug.py │ ├── human.py │ ├── memory.py │ ├── nav.py │ ├── objectnav_mp3d_train.yaml │ ├── objectnav_mp3d_train_vae.yaml │ ├── objectnav_mp3d_train_val.yaml │ ├── objectnav_mp3d_train_val_mini.yaml │ ├── objectnav_mp3d_val_vae.yaml │ ├── objectnav_mp3d_val_vae_d.yaml │ ├── objectnav_mp3d_val_vae_rgbd.yaml │ ├── pointnav_gibson_train_vae.yaml │ ├── pointnav_gibson_val_vae.yaml │ ├── recall_base.py │ ├── repeat.py │ ├── repeat_100.py │ ├── sparse_nav.py │ ├── spatial_sweep.py │ ├── train.py │ ├── train_ae.py │ ├── train_vae.py │ ├── train_vae_d.py │ ├── train_vae_debug.py │ ├── train_vae_rgbd.py │ └── tune.py ├── clean_results.sh ├── custom_metrics.py ├── graphenv.py ├── memory_env.py ├── models │ ├── .gnn.py.swo │ ├── ae.py │ ├── depth_vae.py │ ├── edge_selectors │ │ ├── bernoulli.py │ │ ├── dense.py │ │ ├── distance.py │ │ ├── self_edge.py │ │ └── temporal.py │ ├── gcm.py │ ├── gnn.py │ ├── ray_ae.py │ ├── ray_dnc.py │ ├── ray_graph.py │ ├── ray_vae.py │ ├── ray_vae_d.py │ ├── ray_vae_rgbd.py │ ├── rgbd_vae.py │ ├── sparse_edge_selectors │ │ ├── distance.py │ │ └── temporal.py │ ├── sparse_gcm.py │ ├── sparse_ray_graph.py │ ├── test_gcm.py │ ├── test_ray_graph.py │ ├── test_sparse_gam.py │ ├── vae.py │ └── visualize_grad.py ├── plot │ ├── cartpole.py │ ├── csv_plot.py │ ├── memory.py │ ├── nav.py │ └── nav_sweep.py ├── policies │ └── random_policy.py ├── preprocessors │ ├── autoencoder │ │ ├── dataset_collector.py │ │ ├── ppae.py │ │ ├── ppd_vae.py │ │ ├── pprgbd_vae.py │ │ ├── train_vae.py │ │ └── vae.py │ ├── compass_components.py │ ├── compass_fix.py │ ├── ghost_rgb.py │ ├── nn_semantic.py │ ├── noop.py │ ├── objectgoal.py │ ├── quantized_depth.py │ └── semantic │ │ ├── continuous_onehot.py │ │ ├── onehot.py │ │ └── quantized_mesh.py ├── profile_mvs.py ├── rayenv.py ├── recall_env.py ├── rewards │ ├── basic.py │ ├── collision.py │ ├── explore.py │ └── path.py ├── semantic_colors.py ├── server │ ├── render.py │ └── templates │ │ └── index.html ├── sizes.txt ├── start.py ├── util.py └── val_to_video.py └── vae.pt /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, E266, E402, E501, W503, F403, F401 3 | max-line-length = 79 4 | max-complexity = 18 5 | select = B,C,E,F,W,T4,B9 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.pt filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.swo 3 | *.pyc 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 20.8b1 4 | hooks: 5 | - id: black 6 | language_version: python3.8.5 7 | - repo: https://github.com/pycqa/flake8 #https://github.com/pre-commit/pre-commit-hooks 8 | rev: 3.9.0 9 | hooks: 10 | - id: flake8 11 | - repo: https://github.com/pre-commit/mirrors-mypy 12 | rev: 'v0.812' # Use the sha / tag you want to point at 13 | hooks: 14 | - id: mypy 15 | args: [--no-strict-optional, --ignore-missing-imports] 16 | 17 | 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Graph Convolutional Memory for Reinforcement Learning 2 | This is the code used for the paper [Graph Convolutional Memory for Reinforcement Learning](https://arxiv.org/abs/2106.14117). This repo is intended to aid in reproducability of the paper. If you are interested in using graph convolutional memory in your own project, I suggest you use my `graph-conv-memory` library available [here](https://github.com/smorad/graph-conv-memory). 3 | 4 | ## Description 5 | Graph convolutional memory (GCM) is graph-structured memory that may be applied to reinforcement learning to solve POMDPs, replacing LSTMs or attention mechanisms. 6 | 7 | ## Quickstart 8 | If you are interested in apply GCM for your problem, you must install dependencies `torch` and `torch_geometric`. If you are using `ray rllib` to train, use the `RayObsGraph` model as so (running from the project root directory): 9 | 10 | ``` 11 | import torch 12 | import torch_geometric 13 | 14 | from ray import tune 15 | from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole 16 | 17 | from models.ray_graph import RayObsGraph 18 | from models.edge_selectors.temporal import TemporalBackedge 19 | 20 | our_gnn = torch_geometric.nn.Sequential( 21 | "x, adj, weights, B, N", 22 | [ 23 | (torch_geometric.nn.DenseGraphConv(32, 32), "x, adj -> x"), 24 | (torch.nn.Tanh()), 25 | (torch_geometric.nn.DenseGraphConv(32, 32), "x, adj -> x"), 26 | (torch.nn.Tanh()), 27 | ], 28 | ) 29 | ray_cfg = { 30 | "env": StatelessCartPole, # Replace this with your desired env 31 | "framework": "torch", 32 | "model": { 33 | "custom_model": RayObsGraph, 34 | "custom_model_config": { 35 | "gnn_input_size": 32, 36 | "gnn_output_size": 32, 37 | "gnn": our_gnn, 38 | "edge_selectors": TemporalBackedge([1]) 39 | } 40 | } 41 | } 42 | tune.run("PPO", config=ray_cfg) 43 | ``` 44 | 45 | If you are not using `ray rllib`, use the model like so: 46 | 47 | ``` 48 | import torch 49 | import torch_geometric 50 | from models.gcm import DenseGCM 51 | from models.edge_selectors.temporal import TemporalBackedge 52 | 53 | our_gnn = torch_geometric.nn.Sequential( 54 | "x, adj, weights, B, N", 55 | [ 56 | (torch_geometric.nn.DenseGraphConv(YOUR_OBS_SIZE, 32), "x, adj -> x"), 57 | (torch.nn.Tanh()), 58 | (torch_geometric.nn.DenseGraphConv(32, 32), "x, adj -> x"), 59 | (torch.nn.Tanh()), 60 | ], 61 | ) 62 | gcm = DenseGCM(our_gnn, edge_selectors=TemporalBackedge([1]), graph_size=128) 63 | 64 | # Create initial state 65 | edges = torch.zeros( 66 | (1, 128, 128), dtype=torch.float 67 | ) 68 | nodes = torch.zeros((1, 128, YOUR_OBS_SIZE)) 69 | weights = torch.zeros( 70 | (1, 128, 128), dtype=torch.float 71 | ) 72 | num_nodes = torch.tensor([0], dtype=torch.long) 73 | m_t = [nodes, edges, weights, num_nodes] 74 | 75 | for t in train_timestep: 76 | state, m_t = gcm(obs[t], m_t) 77 | # Do what you will with the state 78 | # likely you want to use it to get action/value estimate 79 | action_logits = logits(state) 80 | state_value = vf(state) 81 | ``` 82 | See `src/models/edge_selectors` for different kinds of priors. 83 | 84 | 85 | ## Full Install 86 | Getting CUDA/python/conda/habitat/ray working together is a project in itself. We run everything in docker to make our setup reproduceable anywhere. The full install will install all our code as used for our various experiments. You only need to do this if you are rerunning our experiments. 87 | 88 | ### Host Setup 89 | We have tested everything using `Docker version 20.10.2, build 2291f61`, `NVidia Driver Version: 460.27.04`, and `CUDA Version: 11.2` so if you run into issues try using these versions on your host. After installing CUDA, follow the [NVidia guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) to install docker and nvidia-docker2. 90 | 91 | Unfortunately CUDA is required for Habitat, so you must follow this step if you want to run the navigation experiment. Once the host is set up, continue with the docker container creation. We also do not store the scene models in the docker container, as they are huge and it's not legal for us to package them in the container. Download the `habitat` task of the matterport3D dataset as shown [here](https://github.com/facebookresearch/habitat-lab#data). Then extract it, and use the extracted directory as `SCENE_DATASET_PATH` in the container setup. 92 | 93 | ### Docker Container Setup 94 | ``` 95 | #!/bin/bash 96 | 97 | cd vnav 98 | # Build the image -- this takes a while, get yourself a coffee 99 | docker build docker -t ray_habitat:latest 100 | 101 | # Launch a container 102 | # Make sure you fill out SCENE_DATASET_PATH to where you've 103 | # stored the mp3d scene_datasets (navigation problem only) 104 | # We cannot share these, you need to sign a waiver with mp3d first 105 | export SCENE_DATASET_PATH=/path_to/scene_datasets 106 | # port description: 107 | # 8265 ray 108 | # 5000 navigation renders 109 | # 5050 visdom 110 | # 6006 tensorboard 111 | docker run \ 112 | --gpus all \ 113 | --shm-size 32g \ 114 | -p 8299:8265 \ 115 | -p 5000:5000 \ 116 | -p 5050:5050 \ 117 | -p 6099:6006 \ 118 | -v ${SCENE_DATASET_PATH}:/root/scene_datasets \ 119 | -ti ray_habitat:latest bash 120 | 121 | # Now we should be in the container 122 | ``` 123 | 124 | ## Execution 125 | Once in the container, make sure the repo is up to date, then run! 126 | 127 | ``` 128 | #!/bin/bash 129 | 130 | # Ensure CUDA is working as expected 131 | nvidia-smi 132 | # Make sure to update source repo in the container 133 | cd /root/vnav 134 | git pull 135 | # Run! 136 | python3 src/start.py src/cfg/memory.py 137 | ``` 138 | 139 | ### Rerunning Experiments 140 | You can rerun our experiments with the following commands: 141 | ``` 142 | python3 src/start.py src/cfg/cartpole.py # Cartpole experiment 143 | python3 src/start.py src/cfg/memory.py # Memory experiment 144 | python3 src/start.py src/cfg/nav.py # Navigation experiment 145 | ``` 146 | 147 | Which will populate `$HOME/ray_results/` with tensorboard data as well as CSV and JSON files containing the training info. 148 | -------------------------------------------------------------------------------- /ae.pt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:48535ba275b1836ebc8e3b53b19f96fe6a314bff0edc0308d7f048758828b86c 3 | size 200429255 4 | -------------------------------------------------------------------------------- /depth_vae.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smorad/graph-conv-memory-paper/4401b87703a631e85347efa665d822e31a41b1c6/depth_vae.pt -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cudagl:11.1-devel-ubuntu20.04 2 | ARG DEBIAN_FRONTEND=noninteractive 3 | RUN apt update && apt install -y git python3 python3-pip vim 4 | RUN git clone https://github.com/facebookresearch/habitat-sim /root/habitat-sim 5 | RUN git clone https://github.com/facebookresearch/habitat-lab /root/habitat-lab 6 | 7 | # Build habitat-sim 8 | RUN apt install -y --no-install-recommends \ 9 | libjpeg-dev libglm-dev libgl1-mesa-glx libegl1-mesa-dev mesa-utils xorg-dev freeglut3-dev libbullet-dev cmake ninja-build g++ 10 | # Make image smaller by not caching downloaded pip pkgs 11 | ARG PIP_NO_CACHE_DIR=1 12 | RUN cd /root/habitat-sim && pip3 install -r requirements.txt && python3 setup.py install --headless --with-cuda --bullet 13 | 14 | # Install pytorch for example, and ensure sim works with all our required pkgs 15 | ARG TORCH=1.8.1 16 | ARG TORCHVISION=0.9.1 17 | ARG CUDA=cu111 18 | # Pytorch and torch_geometric w/ deps 19 | RUN pip3 install torch==${TORCH}+${CUDA} \ 20 | torchvision==${TORCHVISION}+${CUDA} \ 21 | -f https://download.pytorch.org/whl/torch_stable.html 22 | RUN pip3 install torch-scatter torch-sparse torch-cluster torch-spline-conv \ 23 | -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html \ 24 | torch_geometric 25 | # pytorch_geometric can be a bit buggy during install 26 | RUN python3 -c "import torch; import torch_geometric" 27 | #RUN cd /root/habitat-sim && python3 examples/example.py 28 | 29 | # Build habitat-lab and install habitat-baselines deps 30 | RUN cd /root/habitat-lab && pip3 install -e . \ 31 | && pip3 install lmdb ifcfg webdataset==0.1.40 32 | 33 | # Ray rllib 34 | RUN pip3 install gputil https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl dm-tree lz4 hyperopt tensorboardX 35 | 36 | # Detectron 37 | RUN pip3 install 'git+https://github.com/facebookresearch/detectron2.git' 38 | 39 | # Install our deps; webserver for viewing observation and faster PIL 40 | RUN pip3 uninstall -y pillow && pip3 install flask pillow-simd Flask-SocketIO visdom dnc torchviz 41 | 42 | ## Copy token for gitlab clone 43 | ## Disabled for release, don't worry this token is expired :P 44 | # RUN echo "6RJQmn5AezFsn7Nsqu7N" > /root/token && git clone https://oauth2:$(cat /root/token)@gitlab.developers.cam.ac.uk/cst/prorok-lab/vnav /root/vnav 45 | ## Dev tools 46 | #RUN pip3 install pre-commit && cd /root/vnav && pre-commit && pre-commit autoupdate && pre-commit install 47 | 48 | # Download test and object-goal navigation episodes 49 | RUN apt install -y unzip curl && \ 50 | # test 51 | curl http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip --output habitat-test-scenes.zip \ 52 | && unzip habitat-test-scenes.zip -d /root/habitat-lab \ 53 | # mp3d objectnav 54 | && curl https://dl.fbaipublicfiles.com/habitat/data/datasets/objectnav/m3d/v1/objectnav_mp3d_v1.zip \ 55 | --output objectnav_mp3d_v1.zip \ 56 | && mkdir -p /root/habitat-lab/data/datasets/objectnav/mp3d/v1/ \ 57 | && unzip objectnav_mp3d_v1.zip -d /root/habitat-lab/data/datasets/objectnav/mp3d/v1/ \ 58 | && rm objectnav_mp3d_v1.zip \ 59 | # mp3d pointnav 60 | && curl https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/mp3d/v1/pointnav_mp3d_v1.zip \ 61 | --output pointnav_mp3d_v1.zip \ 62 | && mkdir -p /root/habitat-lab/data/datasets/pointnav/mp3d/v1/ \ 63 | && unzip pointnav_mp3d_v1.zip -d /root/habitat-lab/data/datasets/pointnav/mp3d/v1/ \ 64 | && rm pointnav_mp3d_v1.zip \ 65 | # gibson pointnav 66 | && curl https://dl.fbaipublicfiles.com/habitat/data/datasets/pointnav/gibson/v1/pointnav_gibson_v1.zip \ 67 | --output pointnav_gibson_v1.zip \ 68 | && mkdir -p /root/habitat-lab/data/datasets/pointnav/gibson/v1/ \ 69 | && unzip pointnav_gibson_v1.zip -d /root/habitat-lab/data/datasets/pointnav/gibson/v1/ \ 70 | && rm pointnav_gibson_v1.zip 71 | 72 | # Link matterport and gibson models to mounted docker volume 73 | RUN ln -s /root/scene_datasets/mp3d /root/habitat-lab/data/scene_datasets \ 74 | && ln -s /root/scene_datasets/gibson /root/habitat-lab/data/scene_datasets 75 | -------------------------------------------------------------------------------- /rgbd_vae.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smorad/graph-conv-memory-paper/4401b87703a631e85347efa665d822e31a41b1c6/rgbd_vae.pt -------------------------------------------------------------------------------- /src/cfg/.base.py.swn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smorad/graph-conv-memory-paper/4401b87703a631e85347efa665d822e31a41b1c6/src/cfg/.base.py.swn -------------------------------------------------------------------------------- /src/cfg/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ray.rllib.agents.impala import ImpalaTrainer 4 | from ray.rllib.agents.a3c import A3CTrainer 5 | from ray.rllib.agents.ppo import PPOTrainer 6 | from ray.tune import register_env 7 | 8 | from preprocessors.compass_fix import CompassFix 9 | from preprocessors.compass_components import CompassComponents 10 | from preprocessors.semantic.quantized_mesh import QuantizedSemanticMask 11 | from preprocessors.quantized_depth import QuantizedDepth 12 | from preprocessors.ghost_rgb import GhostRGB 13 | from preprocessors.autoencoder.ppae import PPAE 14 | from preprocessors.autoencoder.pprgbd_vae import PPRGBDVAE 15 | from preprocessors.autoencoder.ppd_vae import PPDepthVAE 16 | 17 | from rewards.basic import BasicReward 18 | from rewards.path import PathReward 19 | from rewards.explore import ExplorationReward 20 | from rewards.collision import CollisionReward 21 | 22 | from rayenv import NavEnv 23 | from custom_metrics import CustomMetrics, EvalMetrics 24 | 25 | 26 | register_env(NavEnv.__name__, NavEnv) 27 | cfg_dir = os.path.abspath(os.path.dirname(__file__)) 28 | 29 | # These are specific to our habitat-based environment 30 | env_cfg = { 31 | # Path to the habitat yaml config, that specifies sensor info, 32 | # which maps to use, etc. 33 | "hab_cfg_path": f"{cfg_dir}/objectnav_mp3d_train.yaml", 34 | # Habitat preprocessors change the observation space in the simulator 35 | # These are loaded and run in-order 36 | "preprocessors": { 37 | "compass": CompassFix, 38 | "compass_comp": CompassComponents, 39 | # "semantic": QuantizedSemanticMask, 40 | # "depth": QuantizedDepth, 41 | # "rgb_visualization": GhostRGB, 42 | # "semantic_and_depth_autoencoder": PPAE, 43 | "depth_autoencoder": PPDepthVAE, 44 | }, 45 | # Multiple reward functions may be implemented at once, 46 | # they are summed together 47 | # "rewards": {"stop_goal": BasicReward, "goal_path": PathReward}, 48 | "rewards": {"exploration": ExplorationReward}, 49 | } 50 | 51 | # Change the path for our validation set 52 | val_env_cfg = { 53 | **env_cfg, # type: ignore 54 | "hab_cfg_path": f"{cfg_dir}/objectnav_mp3d_train_val_mini.yaml", 55 | "callbacks": EvalMetrics, 56 | } 57 | 58 | 59 | CFG = { 60 | # Our specific trainer type 61 | "ray_trainer": ImpalaTrainer, 62 | # Ray specific config sent to ray.tune or ray.rllib trainer 63 | "ray": { 64 | "env_config": env_cfg, 65 | # These are rllib/ray specific 66 | "framework": "torch", 67 | "model": {"framestack": False}, 68 | "num_workers": 1, 69 | # Total GPU usage: num_gpus (trainer proc) + num_gpus_per_worker (workers) 70 | "num_gpus_per_worker": 0.15, 71 | "num_cpus_per_worker": 2, 72 | # this corresponds to the number of learner GPUs used, 73 | # not the total used for the environments/rollouts 74 | "num_gpus": 0.15, 75 | # Size of batches (in timesteps) placed in the learner queue 76 | "rollout_fragment_length": 256, 77 | # Total number of timesteps to train per batch 78 | "train_batch_size": 1024, 79 | "lr": 0.0005, 80 | "entropy_coeff": 0.001, 81 | "env": NavEnv.__name__, 82 | "callbacks": CustomMetrics, 83 | "replay_proportion": 1.0, 84 | "replay_buffer_num_slots": 32, 85 | # "placement_strategy": "SPREAD", 86 | # For evaluation 87 | # How many epochs/train iters 88 | # "evaluation_interval": 10, 89 | # "evaluation_num_episodes": 10, 90 | # "evaluation_config": val_env_cfg, 91 | # "evaluation_num_workers": 1, # Must be >0 to get OpenGL 92 | # "evaluation_parallel_to_training": True, 93 | }, 94 | "tune": { 95 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 96 | }, 97 | # Env to be loaded when mode == human 98 | "human_env": NavEnv, 99 | } 100 | -------------------------------------------------------------------------------- /src/cfg/cartpole.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, Any 3 | 4 | from ray.rllib.agents.impala import ImpalaTrainer 5 | from ray.rllib.agents.a3c import A3CTrainer, A2CTrainer 6 | from ray.rllib.agents.ppo import PPOTrainer 7 | from ray.tune import register_env, grid_search 8 | 9 | from rewards.basic import BasicReward 10 | from rewards.path import PathReward 11 | 12 | from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole 13 | 14 | from models.ray_graph import RayObsGraph 15 | from models.ray_dnc import DNCMemory 16 | from models.edge_selectors.temporal import TemporalBackedge 17 | 18 | from copy import deepcopy 19 | import torch_geometric 20 | import torch 21 | 22 | 23 | register_env(StatelessCartPole.__name__, StatelessCartPole) 24 | cfg_dir = os.path.abspath(os.path.dirname(__file__)) 25 | 26 | hiddens = [32, 16, 8] 27 | seq_len = 20 28 | horizon = 200 29 | gsize = 10 30 | 31 | 32 | no_mem = grid_search( 33 | [ 34 | { 35 | "fcnet_hiddens": [hidden, hidden], 36 | "fcnet_activation": "tanh", 37 | } 38 | for hidden in hiddens 39 | ] 40 | ) 41 | 42 | rnn_model = grid_search( 43 | [ 44 | { 45 | "fcnet_hiddens": [hidden, hidden], 46 | "fcnet_activation": "tanh", 47 | "use_lstm": True, 48 | "max_seq_len": seq_len, 49 | "lstm_cell_size": hidden, 50 | # "lstm_use_prev_action": True, 51 | } 52 | for hidden in hiddens 53 | ] 54 | ) # type: ignore 55 | 56 | dnc_model = grid_search( 57 | [ 58 | { 59 | "custom_model": DNCMemory, 60 | "custom_model_config": { 61 | "hidden_size": hidden, 62 | "nr_cells": gsize, 63 | "cell_size": hidden, 64 | "preprocessor_input_size": hidden, 65 | "preprocessor_output_size": hidden, 66 | "preprocessor": torch.nn.Sequential( 67 | torch.nn.Linear(hidden, hidden), 68 | torch.nn.Tanh(), 69 | torch.nn.Linear(hidden, hidden), 70 | torch.nn.Tanh(), 71 | ), 72 | }, 73 | "max_seq_len": seq_len, 74 | } 75 | for hidden in hiddens 76 | ] 77 | ) 78 | 79 | attn_model = grid_search( 80 | [ 81 | { 82 | "fcnet_hiddens": [hidden, hidden], 83 | "fcnet_activation": "tanh", 84 | "use_attention": True, 85 | "attention_num_transformer_units": 1, 86 | "attention_dim": hidden, 87 | "attention_num_heads": 1, 88 | "attention_head_dim": hidden, 89 | "attention_position_wise_mlp_dim": hidden, 90 | "attention_memory_inference": seq_len, 91 | "attention_memory_training": seq_len, 92 | # "attention_use_n_prev_actions": 1, 93 | } 94 | for hidden in hiddens 95 | ] 96 | ) 97 | 98 | 99 | graph_models = [] 100 | for hidden in hiddens: 101 | dgc = torch_geometric.nn.Sequential( 102 | "x, adj, weights, B, N", 103 | [ 104 | # Mean and sum aggregation perform roughly the same 105 | # Preprocessor with 1 layer did not help 106 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 107 | (torch.nn.Tanh()), 108 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 109 | (torch.nn.Tanh()), 110 | ], 111 | ) 112 | dgc.name = "GraphConv_2h" 113 | temporal_model = { 114 | "custom_model": RayObsGraph, 115 | "custom_model_config": { 116 | "graph_size": gsize, 117 | "gnn_input_size": hidden, 118 | "gnn_output_size": hidden, 119 | "gnn": dgc, 120 | # 2 edges outperforms 1 when actions are known 121 | # 1 outperforms 2 when actions are not known 122 | "edge_selectors": TemporalBackedge([1, 2]), 123 | # "use_prev_action": True, 124 | }, 125 | "max_seq_len": seq_len, 126 | } 127 | graph_models.append(temporal_model) 128 | 129 | models = [ 130 | *graph_models, 131 | rnn_model, 132 | attn_model, 133 | no_mem, 134 | dnc_model, 135 | ] 136 | 137 | 138 | CFG = { 139 | # Our specific trainer type 140 | "ray_trainer": PPOTrainer, 141 | # Ray specific config sent to ray.tune or ray.rllib trainer 142 | "ray": { 143 | # These are rllib/ray specific 144 | "env_config": {}, 145 | "framework": "torch", 146 | "model": grid_search(models), 147 | "num_workers": 2, 148 | "num_cpus_per_worker": 2, 149 | "num_gpus": 0.2, 150 | "env": StatelessCartPole, 151 | "vf_loss_coeff": 1e-5, 152 | "horizon": horizon, 153 | # "lr": 0.0005, 154 | # "train_batch_size": 2000, 155 | # "num_sgd_iter": 15, 156 | # "sgd_minibatch_size": 100, 157 | }, 158 | "tune": { 159 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 160 | "stop": {"info/num_steps_trained": 1e6}, 161 | "num_samples": 3, 162 | }, 163 | } 164 | -------------------------------------------------------------------------------- /src/cfg/graph.py: -------------------------------------------------------------------------------- 1 | from cfg import base 2 | from models.ray_graph import RayObsGraph 3 | from models.edge_selectors.temporal import TemporalBackedge 4 | from models.edge_selectors.bernoulli import BernoulliEdge 5 | 6 | import os 7 | 8 | 9 | CFG = base.CFG 10 | CFG["ray"]["model"]["custom_model"] = RayObsGraph 11 | CFG["ray"]["num_workers"] = 6 12 | CFG["ray"]["model"]["custom_model_config"] = { 13 | "graph_size": 32, 14 | "gcn_output_size": 256, 15 | "gcn_hidden_size": 256, 16 | "gcn_num_layers": 3, 17 | "edge_selectors": [TemporalBackedge], 18 | } 19 | # How many past states are used for training 20 | # this should likely be at least `graph_size` 21 | CFG["ray"]["model"]["max_seq_len"] = 32 22 | # this corresponds to the number of learner GPUs used, 23 | # not the total used for the environments/rollouts 24 | # Since this is the bottleneck, we let it use an entire GPU 25 | CFG["ray"]["num_gpus"] = 1 26 | 27 | # For rollout workers 28 | CFG["ray"]["num_gpus_per_worker"] = 0.2 29 | CFG["ray"]["num_cpus_per_worker"] = 4 30 | 31 | # At batch sizes of 1024 and 2048, GPU learn time is roughly the same per sample 32 | CFG["ray"]["train_batch_size"] = 2048 33 | CFG["ray"]["rollout_fragment_length"] = 128 34 | 35 | if os.environ.get("DEBUG", False): 36 | CFG["ray"]["num_workers"] = 0 37 | CFG["ray"]["train_batch_size"] = 64 38 | CFG["ray"]["rollout_fragment_length"] = 32 39 | CFG["ray"]["model"]["custom_model_config"]["edge_selectors"].append(BernoulliEdge) 40 | -------------------------------------------------------------------------------- /src/cfg/graph_attention.py: -------------------------------------------------------------------------------- 1 | from cfg import graph 2 | from torch_geometric.nn import GATConv 3 | 4 | 5 | CFG = graph.CFG 6 | CFG["ray"]["model"]["custom_model_config"]["gcn_conv_type"] = GATConv 7 | CFG["ray"]["model"]["custom_model_config"]["gcn_num_attn_heads"] = 8 8 | CFG["ray"]["num_workers"] = 6 9 | CFG["ray"]["num_gpus_per_worker"] = 0.3 10 | -------------------------------------------------------------------------------- /src/cfg/graph_ddppo.py: -------------------------------------------------------------------------------- 1 | from cfg import base 2 | from models.ray_graph import RayObsGraph 3 | 4 | from ray.rllib.agents.ppo.ddppo import DDPPOTrainer 5 | 6 | CFG = base.CFG 7 | CFG["ray_trainer"] = DDPPOTrainer 8 | CFG["ray"]["model"]["custom_model"] = RayObsGraph 9 | CFG["ray"]["model"]["custom_model_config"] = { 10 | "graph_size": 32, 11 | "gcn_output_size": 128, 12 | "gcn_hidden_size": 256, 13 | } 14 | 15 | # This must be zero for ddppo 16 | del CFG["ray"]["num_gpus"] 17 | del CFG["ray"]["train_batch_size"] 18 | 19 | # For rollout workers 20 | # Each env gets its own learner 21 | # but due to opengl/cuda/forking we can only 22 | # have one env per worker 23 | CFG["ray"]["num_workers"] = 24 24 | CFG["ray"]["num_envs_per_worker"] = 1 25 | CFG["ray"]["num_gpus_per_worker"] = 0.16 26 | CFG["ray"]["num_cpus_per_worker"] = 2 27 | 28 | CFG["ray"]["rollout_fragment_length"] = 100 29 | -------------------------------------------------------------------------------- /src/cfg/graph_debug.py: -------------------------------------------------------------------------------- 1 | from cfg import graph 2 | from torch_geometric.nn import GATConv 3 | 4 | # Run with: 5 | # RAY_PDB=1 python3 start.py cfg/graph.debug.py --local 6 | # see below why 7 | 8 | # Likely you want to run with RAY_PDB=1 start cfg/graph_debug.py 9 | # for postmortem debugging 10 | 11 | CFG = graph.CFG 12 | # workers == 0 only works with --local 13 | # use workers == 1 if you want to avoid local mode 14 | # this is due to opengl/cuda contexts used by env 15 | CFG["ray"]["num_workers"] = 0 16 | CFG["tune"]["stop"] = {"training_iteration": 1} 17 | CFG["ray"]["train_batch_size"] = 8 18 | CFG["ray"]["rollout_fragment_length"] = 8 19 | 20 | CFG["ray"]["model"]["custom_model_config"]["gcn_conv_type"] = GATConv 21 | CFG["ray"]["model"]["custom_model_config"]["gcn_num_attn_heads"] = 8 22 | -------------------------------------------------------------------------------- /src/cfg/human.py: -------------------------------------------------------------------------------- 1 | from cfg import base 2 | from preprocessors.autoencoder.vae import PPVAE 3 | 4 | CFG = base.CFG 5 | 6 | 7 | CFG["ray"]["env_config"]["preprocessors"]["semantic_and_depth_autoencoder"] = PPVAE 8 | -------------------------------------------------------------------------------- /src/cfg/memory.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, Any 3 | 4 | from ray.rllib.agents.impala import ImpalaTrainer 5 | from ray.rllib.agents.ppo import PPOTrainer 6 | from ray.rllib.agents.a3c import A3CTrainer 7 | from ray.tune import register_env, grid_search 8 | 9 | from rewards.basic import BasicReward 10 | from rewards.path import PathReward 11 | 12 | from memory_env import MemoryEnv 13 | from models.ray_graph import RayObsGraph 14 | from models.ray_dnc import DNCMemory 15 | from models.edge_selectors.temporal import TemporalBackedge 16 | from models.edge_selectors.distance import SpatialEdge 17 | 18 | from copy import deepcopy 19 | import torch_geometric 20 | import torch 21 | 22 | 23 | register_env(MemoryEnv.__name__, MemoryEnv) 24 | cfg_dir = os.path.abspath(os.path.dirname(__file__)) 25 | 26 | # exp 1 27 | """ 28 | hiddens = [32] 29 | seq_len = 50 30 | num_cards = 8 31 | num_matches = 2 32 | num_unique_cards = num_cards // num_matches 33 | """ 34 | """ 35 | # exp 2 36 | # optimal_solution = 1.75 * 10 = 18 37 | hiddens = [32] 38 | seq_len = 75 39 | num_cards = 10 40 | num_matches = 2 41 | num_unique_cards = num_cards // num_matches 42 | """ 43 | # exp 3 44 | hiddens = [32] 45 | seq_len = 100 46 | num_cards = 12 47 | num_matches = 2 48 | num_unique_cards = num_cards // num_matches 49 | 50 | edge_selector = torch_geometric.nn.Sequential( 51 | "x, adj, weights, N, B", 52 | [ 53 | # view_flipped discrete 54 | # obs slices: 55 | # ({'card': 0, 'flipped_cards': 5, 'flipped_pos': 15, 'pointer_pos': 31}, 56 | # {'card': 5, 'flipped_cards': 15, 'flipped_pos': 31, 'pointer_pos': 39}) 57 | ( 58 | SpatialEdge( 59 | 1e-3, 60 | # Match current face-up card 61 | slice(2 * (num_unique_cards + 1), 3 * (num_unique_cards + 1)), 62 | # Against previous memories of flipped cards 63 | slice(0, num_unique_cards + 1), 64 | ), 65 | "x, adj, weights, N, B -> adj, weights", 66 | ), 67 | (TemporalBackedge([1, 2]), "x, adj, weights, N, B -> adj, weights"), 68 | ], 69 | ) 70 | gsize = seq_len + 2 71 | 72 | 73 | no_mem = [ 74 | { 75 | "fcnet_hiddens": [hidden, hidden], 76 | "fcnet_activation": "tanh", 77 | } 78 | for hidden in hiddens 79 | ] 80 | 81 | rnn_model = [ 82 | { 83 | "fcnet_hiddens": [hidden, hidden], 84 | "fcnet_activation": "tanh", 85 | "use_lstm": True, 86 | "max_seq_len": seq_len, 87 | "lstm_cell_size": hidden, 88 | "lstm_use_prev_action": True, 89 | } 90 | for hidden in hiddens 91 | ] 92 | 93 | dnc_model = [ 94 | { 95 | "custom_model": DNCMemory, 96 | "custom_model_config": { 97 | "hidden_size": hidden, 98 | "nr_cells": hidden, 99 | "cell_size": hidden, 100 | "preprocessor_input_size": hidden, 101 | "preprocessor_output_size": hidden, 102 | "preprocessor": torch.nn.Sequential( 103 | torch.nn.Linear(hidden, hidden), 104 | torch.nn.Tanh(), 105 | torch.nn.Linear(hidden, hidden), 106 | torch.nn.Tanh(), 107 | ), 108 | "use_prev_action": True, 109 | }, 110 | "max_seq_len": seq_len, 111 | } 112 | for hidden in hiddens 113 | ] 114 | 115 | attn_model = [ 116 | { 117 | "fcnet_hiddens": [hidden, hidden], 118 | "fcnet_activation": "tanh", 119 | "use_attention": True, 120 | "attention_num_transformer_units": 1, 121 | "attention_dim": hidden, 122 | "attention_num_heads": 1, 123 | "attention_head_dim": hidden, 124 | "attention_position_wise_mlp_dim": hidden, 125 | "attention_memory_inference": seq_len, 126 | "attention_memory_training": seq_len, 127 | "attention_use_n_prev_actions": seq_len, 128 | } 129 | for hidden in hiddens 130 | ] 131 | 132 | 133 | graph_models = [] 134 | for hidden in hiddens: 135 | 136 | dgc = torch_geometric.nn.Sequential( 137 | "x, adj, weights, B, N", 138 | [ 139 | # Mean and sum aggregation perform roughly the same 140 | # Preprocessor with 1 layer did not help 141 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 142 | (torch.nn.Tanh()), 143 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 144 | (torch.nn.Tanh()), 145 | ], 146 | ) 147 | dgc.name = "GraphConv_2h" 148 | sgm_model = { 149 | "custom_model": RayObsGraph, 150 | "custom_model_config": { 151 | "graph_size": gsize, 152 | "gnn_input_size": hidden, 153 | "gnn_output_size": hidden, 154 | "gnn": dgc, 155 | "edge_selectors": edge_selector, 156 | "use_prev_action": True, 157 | }, 158 | "max_seq_len": seq_len, 159 | } 160 | graph_models.append(sgm_model) 161 | 162 | models = [ 163 | *graph_models, 164 | *attn_model, 165 | *rnn_model, 166 | *no_mem, 167 | *dnc_model, 168 | ] 169 | 170 | 171 | CFG = { 172 | # Our specific trainer type 173 | "ray_trainer": A3CTrainer, 174 | # Ray specific config sent to ray.tune or ray.rllib trainer 175 | "ray": { 176 | "env_config": { 177 | "num_matches": num_matches, 178 | "num_cards": num_cards, 179 | "mode": "view_flipped", 180 | "episode_length": seq_len - 1, 181 | "discrete": True, 182 | }, 183 | "framework": "torch", 184 | "model": grid_search(models), 185 | "num_envs_per_worker": 4, 186 | "num_cpus_per_worker": 2, 187 | "num_gpus": 0.2, 188 | "env": MemoryEnv.__name__, 189 | "entropy_coeff": 0.001, 190 | "vf_loss_coeff": 0.05, 191 | "lr": 0.0005, 192 | "train_batch_size": 2000, 193 | }, 194 | "tune": { 195 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 196 | "stop": {"info/num_steps_trained": 10e6}, 197 | "num_samples": 3, 198 | }, 199 | } 200 | 201 | 202 | if os.environ.get("DEBUG", False): 203 | CFG["ray"]["model"] = graph_models[0] 204 | CFG["ray_trainer"] = ImpalaTrainer 205 | CFG["ray"]["num_workers"] = 0 206 | CFG["ray"]["num_gpus"] = 0.25 207 | CFG["ray"]["train_batch_size"] = 128 208 | CFG["ray"]["rollout_fragment_length"] = 64 209 | CFG["tune"] = { 210 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 211 | "stop": {"info/num_steps_trained": 128}, 212 | } 213 | -------------------------------------------------------------------------------- /src/cfg/nav.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from typing import Dict, Any, List 3 | from cfg import base 4 | from custom_metrics import EvalMetrics 5 | from models.ray_graph import RayObsGraph 6 | from models.ray_dnc import DNCMemory 7 | from models.edge_selectors.temporal import TemporalBackedge 8 | from models.edge_selectors.bernoulli import BernoulliEdge 9 | from models.edge_selectors.distance import CosineEdge, SpatialEdge 10 | from models.edge_selectors.dense import DenseEdge 11 | from ray.tune import grid_search 12 | import torch 13 | import torch_geometric 14 | 15 | import os 16 | 17 | seq_len = 128 18 | hiddens = [32, 16, 8] 19 | gsize = seq_len + 1 20 | act_dim = 3 21 | 22 | 23 | no_mem: List[Any] = [ 24 | { 25 | "fcnet_hiddens": [hidden, hidden], 26 | "fcnet_activation": "tanh", 27 | } 28 | for hidden in hiddens 29 | ] 30 | 31 | rnn_model = [ 32 | { 33 | "fcnet_hiddens": [hidden, hidden], 34 | "fcnet_activation": "tanh", 35 | "use_lstm": True, 36 | "max_seq_len": seq_len, 37 | "lstm_cell_size": hidden, 38 | "lstm_use_prev_action": True, 39 | } 40 | for hidden in hiddens 41 | ] 42 | 43 | attn_model = [ 44 | { 45 | "fcnet_hiddens": [hidden, hidden], 46 | "fcnet_activation": "tanh", 47 | "use_attention": True, 48 | "attention_num_transformer_units": 1, 49 | "attention_dim": hidden, 50 | "attention_num_heads": 1, 51 | "attention_head_dim": hidden, 52 | "attention_position_wise_mlp_dim": hidden, 53 | "attention_memory_inference": seq_len, 54 | "attention_memory_training": seq_len, 55 | "attention_use_n_prev_actions": 1, 56 | } 57 | for hidden in hiddens 58 | ] 59 | 60 | graph_models = [] 61 | for hidden in hiddens: 62 | dgc = torch_geometric.nn.Sequential( 63 | "x, adj, weights, B, N", 64 | [ 65 | ( 66 | torch_geometric.nn.DenseGraphConv(hidden, hidden, aggr="mean"), 67 | "x, adj -> x", 68 | ), 69 | (torch.nn.Tanh()), 70 | ( 71 | torch_geometric.nn.DenseGraphConv(hidden, hidden, aggr="mean"), 72 | "x, adj -> x", 73 | ), 74 | (torch.nn.Tanh()), 75 | ], 76 | ) 77 | dgc.name = "GraphConv_2h_mean" 78 | dgc.__class__.__repr__ = lambda self: self.name 79 | base_model = { 80 | "custom_model": RayObsGraph, 81 | "custom_model_config": { 82 | "graph_size": gsize, 83 | "gnn_input_size": hidden, 84 | "gnn_output_size": hidden, 85 | "gnn": dgc, 86 | "use_prev_action": True, 87 | }, 88 | "max_seq_len": seq_len, 89 | } 90 | # graph_models.append(base_model) 91 | 92 | temporal_model = deepcopy(base_model) 93 | temporal_model["custom_model_config"]["edge_selectors"] = TemporalBackedge() 94 | # graph_models.append(temporal_model) 95 | 96 | spatial_model = deepcopy(base_model) 97 | spatial_model["custom_model_config"]["edge_selectors"] = SpatialEdge( 98 | max_distance=0.25, a_pose_slice=slice(2, 4) 99 | ) 100 | graph_models.append(spatial_model) 101 | 102 | vae_model = deepcopy(base_model) 103 | vae_model["custom_model_config"]["edge_selectors"] = SpatialEdge( 104 | max_distance=0.1, a_pose_slice=slice(3, 67) 105 | ) 106 | # graph_models.append(vae_model) 107 | 108 | 109 | dnc_model = [ 110 | { 111 | "custom_model": DNCMemory, 112 | "custom_model_config": { 113 | "hidden_size": hidden, 114 | "nr_cells": hidden, 115 | "cell_size": hidden, 116 | "preprocessor": torch.nn.Sequential( 117 | torch.nn.Linear(hidden, hidden), 118 | torch.nn.Tanh(), 119 | torch.nn.Linear(hidden, hidden), 120 | torch.nn.Tanh(), 121 | ), 122 | "preprocessor_input_size": hidden, 123 | "preprocessor_output_size": hidden, 124 | "use_prev_action": True, 125 | }, 126 | "max_seq_len": seq_len, 127 | } 128 | for hidden in hiddens 129 | ] 130 | 131 | models = [*graph_models, *attn_model, *rnn_model, *no_mem, *dnc_model] 132 | 133 | CFG = base.CFG 134 | CFG["ray"]["num_workers"] = 4 135 | CFG["ray"]["model"] = grid_search(models) 136 | 137 | # this corresponds to the number of learner GPUs used, 138 | # not the total used for the environments/rollouts 139 | # Since this is the bottleneck, we let it use an entire 1024 140 | CFG["ray"]["num_gpus"] = 0.2 141 | 142 | # For rollout workers 143 | CFG["ray"]["num_gpus_per_worker"] = 0.2 144 | CFG["ray"]["num_cpus_per_worker"] = 2 145 | 146 | # At batch sizes of 1024 and 2048, GPU learn time is roughly the same per sample 147 | CFG["ray"]["train_batch_size"] = 1024 148 | CFG["ray"]["rollout_fragment_length"] = seq_len 149 | 150 | CFG["tune"] = { 151 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 152 | "stop": {"info/num_steps_trained": 10e6}, 153 | "num_samples": 1, 154 | } 155 | 156 | if os.environ.get("DEBUG", False): 157 | CFG["ray"]["model"] = dnc_model[0] 158 | CFG["ray"]["num_workers"] = 1 159 | CFG["ray"]["num_gpus"] = 0.3 160 | CFG["ray"]["train_batch_size"] = 128 161 | CFG["ray"]["rollout_fragment_length"] = 64 162 | CFG["tune"] = { 163 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 164 | "stop": {"info/num_steps_trained": 128}, 165 | "num_samples": 3, 166 | } 167 | -------------------------------------------------------------------------------- /src/cfg/objectnav_mp3d_train.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 128 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 30 6 | TILT_ANGLE: 30 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['DEPTH_SENSOR'] #, 'SEMANTIC_SENSOR'] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | SEMANTIC_SENSOR: 16 | WIDTH: 32 17 | HEIGHT: 32 18 | HFOV: 79 19 | POSITION: [0, 0.88, 0] 20 | RGB_SENSOR: 21 | WIDTH: 32 22 | HEIGHT: 32 23 | HFOV: 79 24 | POSITION: [0, 0.88, 0] 25 | DEPTH_SENSOR: 26 | WIDTH: 32 27 | HEIGHT: 32 28 | HFOV: 79 29 | MIN_DEPTH: 0.5 30 | MAX_DEPTH: 5.0 31 | POSITION: [0, 0.88, 0] 32 | TASK: 33 | TYPE: ObjectNav-v1 34 | POSSIBLE_ACTIONS: ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"] #, "LOOK_UP", "LOOK_DOWN"] 35 | SUCCESS_DISTANCE: 0.2 36 | 37 | SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR'] #['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] 38 | # GOAL_SENSOR_UUID: objectgoal 39 | 40 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL', 'SOFT_SPL'] 41 | 42 | DISTANCE_TO_GOAL: 43 | DISTANCE_TO: VIEW_POINTS 44 | SUCCESS: 45 | SUCCESS_DISTANCE: 0.2 46 | 47 | DATASET: 48 | TYPE: ObjectNav-v1 49 | SPLIT: val_mini 50 | DATA_PATH: "/root/habitat-lab/data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" 51 | SCENES_DIR: "/root/habitat-lab/data/scene_datasets/" 52 | -------------------------------------------------------------------------------- /src/cfg/objectnav_mp3d_train_vae.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 128 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 30 6 | TILT_ANGLE: 30 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['DEPTH_SENSOR'] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | DEPTH_SENSOR: 16 | WIDTH: 32 17 | HEIGHT: 32 18 | HFOV: 79 19 | MIN_DEPTH: 0.5 20 | MAX_DEPTH: 5.0 21 | POSITION: [0, 0.88, 0] 22 | TASK: 23 | TYPE: ObjectNav-v1 24 | POSSIBLE_ACTIONS: ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"] 25 | SUCCESS_DISTANCE: 0.2 26 | 27 | SENSORS: [] 28 | GOAL_SENSOR_UUID: pointnav 29 | 30 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS'] 31 | 32 | DISTANCE_TO_GOAL: 33 | DISTANCE_TO: VIEW_POINTS 34 | SUCCESS: 35 | SUCCESS_DISTANCE: 0.2 36 | 37 | DATASET: 38 | TYPE: ObjectNav-v1 39 | SPLIT: train 40 | DATA_PATH: "/root/habitat-lab/data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" 41 | SCENES_DIR: "/root/habitat-lab/data/scene_datasets/" 42 | -------------------------------------------------------------------------------- /src/cfg/objectnav_mp3d_train_val.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 128 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 30 6 | TILT_ANGLE: 30 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] #, 'SEMANTIC_SENSOR'] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | SEMANTIC_SENSOR: 16 | WIDTH: 32 17 | HEIGHT: 32 18 | HFOV: 79 19 | POSITION: [0, 0.88, 0] 20 | RGB_SENSOR: 21 | WIDTH: 32 22 | HEIGHT: 32 23 | HFOV: 79 24 | POSITION: [0, 0.88, 0] 25 | DEPTH_SENSOR: 26 | WIDTH: 32 27 | HEIGHT: 32 28 | HFOV: 79 29 | MIN_DEPTH: 0.5 30 | MAX_DEPTH: 5.0 31 | POSITION: [0, 0.88, 0] 32 | TASK: 33 | TYPE: ObjectNav-v1 34 | POSSIBLE_ACTIONS: ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"] #, "LOOK_UP", "LOOK_DOWN"] 35 | SUCCESS_DISTANCE: 0.2 36 | 37 | SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR'] #['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] 38 | # GOAL_SENSOR_UUID: objectgoal 39 | 40 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL', 'SOFT_SPL'] 41 | 42 | DISTANCE_TO_GOAL: 43 | DISTANCE_TO: VIEW_POINTS 44 | SUCCESS: 45 | SUCCESS_DISTANCE: 0.2 46 | 47 | DATASET: 48 | TYPE: ObjectNav-v1 49 | SPLIT: val_mini 50 | DATA_PATH: "/root/habitat-lab/data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" 51 | SCENES_DIR: "/root/habitat-lab/data/scene_datasets/" 52 | -------------------------------------------------------------------------------- /src/cfg/objectnav_mp3d_train_val_mini.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 128 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 30 6 | TILT_ANGLE: 30 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] #, 'SEMANTIC_SENSOR'] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | SEMANTIC_SENSOR: 16 | WIDTH: 32 17 | HEIGHT: 32 18 | HFOV: 79 19 | POSITION: [0, 0.88, 0] 20 | RGB_SENSOR: 21 | WIDTH: 32 22 | HEIGHT: 32 23 | HFOV: 79 24 | POSITION: [0, 0.88, 0] 25 | DEPTH_SENSOR: 26 | WIDTH: 32 27 | HEIGHT: 32 28 | HFOV: 79 29 | MIN_DEPTH: 0.5 30 | MAX_DEPTH: 5.0 31 | POSITION: [0, 0.88, 0] 32 | TASK: 33 | TYPE: ObjectNav-v1 34 | POSSIBLE_ACTIONS: ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"] #, "LOOK_UP", "LOOK_DOWN"] 35 | SUCCESS_DISTANCE: 0.2 36 | 37 | SENSORS: [] #['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] 38 | # GOAL_SENSOR_UUID: objectgoal 39 | 40 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS'] 41 | 42 | DISTANCE_TO_GOAL: 43 | DISTANCE_TO: VIEW_POINTS 44 | SUCCESS: 45 | SUCCESS_DISTANCE: 0.2 46 | 47 | DATASET: 48 | TYPE: ObjectNav-v1 49 | SPLIT: val_mini 50 | DATA_PATH: "/root/habitat-lab/data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" 51 | SCENES_DIR: "/root/habitat-lab/data/scene_datasets/" 52 | -------------------------------------------------------------------------------- /src/cfg/objectnav_mp3d_val_vae.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 128 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 30 6 | TILT_ANGLE: 30 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['DEPTH_SENSOR'] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | DEPTH_SENSOR: 16 | WIDTH: 32 17 | HEIGHT: 32 18 | HFOV: 79 19 | MIN_DEPTH: 0.5 20 | MAX_DEPTH: 5.0 21 | POSITION: [0, 0.88, 0] 22 | TASK: 23 | TYPE: ObjectNav-v1 24 | POSSIBLE_ACTIONS: ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"] 25 | SUCCESS_DISTANCE: 0.2 26 | 27 | SENSORS: [] 28 | GOAL_SENSOR_UUID: objectgoal 29 | 30 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS'] 31 | 32 | DISTANCE_TO_GOAL: 33 | DISTANCE_TO: VIEW_POINTS 34 | SUCCESS: 35 | SUCCESS_DISTANCE: 0.2 36 | 37 | DATASET: 38 | TYPE: ObjectNav-v1 39 | SPLIT: val_mini 40 | DATA_PATH: "/root/habitat-lab/data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" 41 | SCENES_DIR: "/root/habitat-lab/data/scene_datasets/" 42 | -------------------------------------------------------------------------------- /src/cfg/objectnav_mp3d_val_vae_d.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 64 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 30 6 | TILT_ANGLE: 30 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['DEPTH_SENSOR'] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | DEPTH_SENSOR: 16 | WIDTH: 32 17 | HEIGHT: 32 18 | HFOV: 79 19 | MIN_DEPTH: 0.5 20 | MAX_DEPTH: 5.0 21 | POSITION: [0, 0.88, 0] 22 | TASK: 23 | TYPE: ObjectNav-v1 24 | POSSIBLE_ACTIONS: ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"] 25 | SUCCESS_DISTANCE: 0.2 26 | 27 | SENSORS: ['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] 28 | GOAL_SENSOR_UUID: objectgoal 29 | 30 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL', 'SOFT_SPL'] 31 | 32 | DISTANCE_TO_GOAL: 33 | DISTANCE_TO: VIEW_POINTS 34 | SUCCESS: 35 | SUCCESS_DISTANCE: 0.2 36 | 37 | DATASET: 38 | TYPE: ObjectNav-v1 39 | SPLIT: val_mini 40 | DATA_PATH: "/root/habitat-lab/data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" 41 | SCENES_DIR: "/root/habitat-lab/data/scene_datasets/" 42 | -------------------------------------------------------------------------------- /src/cfg/objectnav_mp3d_val_vae_rgbd.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 64 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 30 6 | TILT_ANGLE: 30 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | RGB_SENSOR: 16 | WIDTH: 32 17 | HEIGHT: 32 18 | HFOV: 79 19 | POSITION: [0, 0.88, 0] 20 | DEPTH_SENSOR: 21 | WIDTH: 32 22 | HEIGHT: 32 23 | HFOV: 79 24 | MIN_DEPTH: 0.5 25 | MAX_DEPTH: 5.0 26 | POSITION: [0, 0.88, 0] 27 | TASK: 28 | TYPE: ObjectNav-v1 29 | POSSIBLE_ACTIONS: ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"] 30 | SUCCESS_DISTANCE: 0.2 31 | 32 | SENSORS: ['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] 33 | GOAL_SENSOR_UUID: objectgoal 34 | 35 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL', 'SOFT_SPL'] 36 | 37 | DISTANCE_TO_GOAL: 38 | DISTANCE_TO: VIEW_POINTS 39 | SUCCESS: 40 | SUCCESS_DISTANCE: 0.2 41 | 42 | DATASET: 43 | TYPE: ObjectNav-v1 44 | SPLIT: val_mini 45 | DATA_PATH: "/root/habitat-lab/data/datasets/objectnav/mp3d/v1/{split}/{split}.json.gz" 46 | SCENES_DIR: "/root/habitat-lab/data/scene_datasets/" 47 | -------------------------------------------------------------------------------- /src/cfg/pointnav_gibson_train_vae.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 128 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 30 6 | TILT_ANGLE: 30 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['DEPTH_SENSOR'] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | DEPTH_SENSOR: 16 | WIDTH: 32 17 | HEIGHT: 32 18 | HFOV: 79 19 | MIN_DEPTH: 0.5 20 | MAX_DEPTH: 5.0 21 | POSITION: [0, 0.88, 0] 22 | TASK: 23 | TYPE: ObjectNav-v1 24 | POSSIBLE_ACTIONS: ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"] 25 | SUCCESS_DISTANCE: 0.2 26 | 27 | SENSORS: [] 28 | GOAL_SENSOR_UUID: pointgoal 29 | 30 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS'] 31 | 32 | DISTANCE_TO_GOAL: 33 | DISTANCE_TO: VIEW_POINTS 34 | SUCCESS: 35 | SUCCESS_DISTANCE: 0.2 36 | 37 | DATASET: 38 | TYPE: PointNav-v1 39 | SPLIT: train 40 | DATA_PATH: "/root/habitat-lab/data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz" 41 | SCENES_DIR: "/root/habitat-lab/data/scene_datasets/" 42 | -------------------------------------------------------------------------------- /src/cfg/pointnav_gibson_val_vae.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 128 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 30 6 | TILT_ANGLE: 30 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['DEPTH_SENSOR'] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | DEPTH_SENSOR: 16 | WIDTH: 32 17 | HEIGHT: 32 18 | HFOV: 79 19 | MIN_DEPTH: 0.5 20 | MAX_DEPTH: 5.0 21 | POSITION: [0, 0.88, 0] 22 | TASK: 23 | TYPE: ObjectNav-v1 24 | POSSIBLE_ACTIONS: ["MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT"] 25 | SUCCESS_DISTANCE: 0.2 26 | 27 | SENSORS: [] 28 | GOAL_SENSOR_UUID: pointgoal 29 | 30 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS'] 31 | 32 | DISTANCE_TO_GOAL: 33 | DISTANCE_TO: VIEW_POINTS 34 | SUCCESS: 35 | SUCCESS_DISTANCE: 0.2 36 | 37 | DATASET: 38 | TYPE: PointNav-v1 39 | SPLIT: val_mini 40 | DATA_PATH: "/root/habitat-lab/data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz" 41 | SCENES_DIR: "/root/habitat-lab/data/scene_datasets/" 42 | -------------------------------------------------------------------------------- /src/cfg/recall_base.py: -------------------------------------------------------------------------------- 1 | import os 2 | from ray.tune import register_env 3 | from recall_env import RecallEnv 4 | from ray.rllib.agents.impala import ImpalaTrainer 5 | from models.ray_graph import RayObsGraph 6 | from models.ray_dnc import DNCMemory 7 | from ray.tune import grid_search 8 | 9 | from models.edge_selectors.bernoulli import BernoulliEdge 10 | from models.edge_selectors.temporal import TemporalBackedge 11 | 12 | 13 | register_env(RecallEnv.__name__, RecallEnv) 14 | 15 | base_model = { 16 | "custom_model": RayObsGraph, 17 | "custom_model_config": { 18 | "graph_size": 17, 19 | "gcn_output_size": 16, 20 | "gcn_hidden_size": 16, 21 | "gcn_num_layers": 4, 22 | "edge_selectors": [], 23 | }, 24 | "max_seq_len": 9, 25 | } 26 | 27 | temporal_model = { 28 | "custom_model": RayObsGraph, 29 | "custom_model_config": { 30 | "graph_size": 17, 31 | "gcn_output_size": 16, 32 | "gcn_hidden_size": 16, 33 | "gcn_num_layers": 4, 34 | "edge_selectors": [TemporalBackedge], 35 | }, 36 | "max_seq_len": 9, 37 | } 38 | 39 | bernoulli_model = { 40 | "custom_model": RayObsGraph, 41 | "custom_model_config": { 42 | "graph_size": 17, 43 | "gcn_output_size": 16, 44 | "gcn_hidden_size": 16, 45 | "gcn_num_layers": 4, 46 | "edge_selectors": [BernoulliEdge], 47 | }, 48 | "max_seq_len": 9, 49 | } 50 | 51 | rnn_model = {"use_lstm": True, "max_seq_len": 9, "lstm_cell_size": 16} 52 | 53 | dnc_model = { 54 | "custom_model": DNCMemory, 55 | "custom_model_config": { 56 | "hidden_size": 16, 57 | "num_layers": 1, 58 | "num_hidden_layers": 2, 59 | "read_heads": 4, 60 | "nr_cells": 9, 61 | "cell_size": 8, 62 | }, 63 | "max_seq_len": 9, 64 | } 65 | 66 | models = [rnn_model, dnc_model, base_model] # temporal_model, bernoulli_model] 67 | 68 | CFG = { 69 | # Our specific trainer type 70 | "ray_trainer": ImpalaTrainer, 71 | # Ray specific config sent to ray.tune or ray.rllib trainer 72 | "ray": { 73 | "env_config": {"dim": 4, "max_items": 4, "max_queries": 4}, 74 | # These are rllib/ray specific 75 | "framework": "torch", 76 | "model": grid_search(models), 77 | "num_workers": 2, 78 | # Total GPU usage: num_gpus (trainer proc) + num_gpus_per_worker (workers) 79 | "num_cpus_per_worker": 4, 80 | "num_envs_per_worker": 1, 81 | # this corresponds to the number of learner GPUs used, 82 | # not the total used for the environments/rollouts 83 | "num_gpus": 1, 84 | # Size of batches (in timesteps) placed in the learner queue 85 | "rollout_fragment_length": 16, 86 | # Total number of timesteps to train per batch 87 | "train_batch_size": 512, 88 | "lr": 0.0001, 89 | "env": RecallEnv.__name__, 90 | }, 91 | "tune": { 92 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 93 | "stop": {"info/num_steps_trained": 10e6}, 94 | }, 95 | # Env to be loaded when mode == human 96 | "human_env": RecallEnv, 97 | } 98 | 99 | if os.environ.get("DEBUG", False): 100 | print("-------DEBUG MODE---------") 101 | # CFG['ray']['model']['custom_model_config']['edge_selectors'] = [TemporalBackedge] 102 | CFG["ray"]["model"] = base_model 103 | # CFG['ray']['model']['custom_model_config']['export_gradients'] = True 104 | # CFG['ray']['model']['custom_model_config'] 105 | # CFG['ray']['num_envs_per_worker'] = 1 106 | # CFG["ray"]["num_workers"] = 0 107 | # CFG['ray']['train_batch_size'] = 64 108 | # CFG['ray']['rollout_fragment_length'] = 32 109 | -------------------------------------------------------------------------------- /src/cfg/repeat.py: -------------------------------------------------------------------------------- 1 | from ray.rllib.agents.impala import ImpalaTrainer 2 | from ray.rllib.agents.ppo import PPOTrainer 3 | from ray.rllib.examples.env.repeat_after_me_env import RepeatAfterMeEnv 4 | from ray.tune import register_env 5 | from ray.tune import grid_search 6 | from models.edge_selectors.temporal import TemporalBackedge 7 | from models.ray_graph import RayObsGraph 8 | import torch_geometric 9 | import torch 10 | from copy import deepcopy 11 | from typing import Dict, Any 12 | from models.ray_dnc import DNCMemory 13 | 14 | from cfg import base 15 | 16 | register_env(RepeatAfterMeEnv.__name__, RepeatAfterMeEnv) 17 | seq_len = 101 18 | hidden = 32 19 | gsize = seq_len + 1 20 | delay = 2 21 | 22 | # These are specific to our habitat-based environment 23 | env_cfg = { 24 | "repeat_delay": delay, 25 | } 26 | 27 | dgc = torch_geometric.nn.Sequential( 28 | "x, adj, weights, B, N", 29 | [ 30 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 31 | (torch.nn.Tanh()), 32 | ], 33 | ) 34 | dgc.name = "GraphConv_1h" 35 | base_model = { 36 | "custom_model": RayObsGraph, 37 | "custom_model_config": { 38 | "graph_size": gsize, 39 | "gnn_input_size": hidden, 40 | "gnn_output_size": hidden, 41 | "gnn": dgc, 42 | # "use_prev_action": True, 43 | }, 44 | "max_seq_len": seq_len, 45 | } 46 | temporal_model = deepcopy(base_model) 47 | temporal_model["custom_model_config"]["edge_selectors"] = TemporalBackedge(hops=[delay]) 48 | no_mem: Dict[str, Any] = { 49 | "fcnet_hiddens": [hidden], 50 | "fcnet_activation": "tanh", 51 | } 52 | rnn_model = { 53 | **no_mem, 54 | "use_lstm": True, 55 | "max_seq_len": seq_len, 56 | "lstm_cell_size": hidden, 57 | # "lstm_use_prev_action": True, 58 | } # type: ignore 59 | dnc = { 60 | "custom_model": DNCMemory, 61 | "custom_model_config": { 62 | "hidden_size": hidden, 63 | # "num_layers": 1, 64 | # "num_hidden_layers": 1, 65 | "read_heads": 2, 66 | "nr_cells": gsize, 67 | "cell_size": hidden, 68 | }, 69 | "max_seq_len": seq_len, 70 | } 71 | attn_model = { 72 | **no_mem, 73 | "use_attention": True, 74 | "attention_num_transformer_units": 1, 75 | "attention_dim": hidden, 76 | "attention_num_heads": 1, 77 | "attention_head_dim": hidden, 78 | "attention_position_wise_mlp_dim": hidden, 79 | "attention_memory_inference": seq_len, 80 | "attention_memory_training": seq_len, 81 | # "attention_use_n_prev_actions": 1, 82 | } 83 | 84 | models = [ 85 | temporal_model, 86 | rnn_model, 87 | attn_model, 88 | # dnc, 89 | ] 90 | 91 | CFG = { 92 | # Our specific trainer type 93 | "ray_trainer": ImpalaTrainer, 94 | # Ray specific config sent to ray.tune or ray.rllib trainer 95 | "ray": { 96 | # These are rllib/ray specific 97 | "env_config": env_cfg, 98 | "framework": "torch", 99 | "model": grid_search(models), 100 | "num_workers": 2, 101 | # "num_cpus_per_worker": 16, 102 | # "num_envs_per_worker": 8, 103 | # "num_gpus_per_worker": 0.1, 104 | "num_gpus": 1, 105 | "env": RepeatAfterMeEnv.__name__, 106 | "entropy_coeff": 0.001, 107 | "vf_loss_coeff": 1e-4, 108 | "gamma": 0.99, 109 | # Use these to prevent losing reward on the final step 110 | # due to overflow 111 | # "horizon": seq_len - 1, 112 | "batch_mode": "complete_episodes", 113 | "vtrace": False, 114 | "lr": 0.0005, 115 | }, 116 | "tune": { 117 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 118 | "stop": {"info/num_steps_trained": 1e6}, 119 | }, 120 | } 121 | -------------------------------------------------------------------------------- /src/cfg/repeat_100.py: -------------------------------------------------------------------------------- 1 | from ray.rllib.agents.impala import ImpalaTrainer 2 | from ray.rllib.agents.ppo import PPOTrainer 3 | from ray.rllib.examples.env.repeat_after_me_env import RepeatAfterMeEnv 4 | from ray.tune import register_env 5 | from ray.tune import grid_search 6 | from models.edge_selectors.temporal import TemporalBackedge 7 | from models.ray_graph import RayObsGraph 8 | import torch_geometric 9 | import torch 10 | from copy import deepcopy 11 | from typing import Dict, Any 12 | from models.ray_dnc import DNCMemory 13 | 14 | from cfg import base 15 | 16 | register_env(RepeatAfterMeEnv.__name__, RepeatAfterMeEnv) 17 | seq_len = 101 18 | hidden = 32 19 | gsize = seq_len + 1 20 | delay = 20 21 | 22 | # These are specific to our habitat-based environment 23 | env_cfg = { 24 | "repeat_delay": delay, 25 | } 26 | 27 | dgc = torch_geometric.nn.Sequential( 28 | "x, adj, weights, B, N", 29 | [ 30 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 31 | (torch.nn.Tanh()), 32 | ], 33 | ) 34 | dgc.name = "GraphConv_1h" 35 | base_model = { 36 | "custom_model": RayObsGraph, 37 | "custom_model_config": { 38 | "graph_size": gsize, 39 | "gnn_input_size": hidden, 40 | "gnn_output_size": hidden, 41 | "gnn": dgc, 42 | # "use_prev_action": True, 43 | }, 44 | "max_seq_len": seq_len, 45 | } 46 | temporal_model = deepcopy(base_model) 47 | temporal_model["custom_model_config"]["edge_selectors"] = TemporalBackedge(hops=[delay]) 48 | no_mem: Dict[str, Any] = { 49 | "fcnet_hiddens": [hidden], 50 | "fcnet_activation": "tanh", 51 | } 52 | rnn_model = { 53 | **no_mem, 54 | "use_lstm": True, 55 | "max_seq_len": seq_len, 56 | "lstm_cell_size": hidden, 57 | # "lstm_use_prev_action": True, 58 | } # type: ignore 59 | dnc = { 60 | "custom_model": DNCMemory, 61 | "custom_model_config": { 62 | "hidden_size": hidden, 63 | # "num_layers": 1, 64 | # "num_hidden_layers": 1, 65 | "read_heads": 2, 66 | "nr_cells": gsize, 67 | "cell_size": hidden, 68 | }, 69 | "max_seq_len": seq_len, 70 | } 71 | attn_model = { 72 | **no_mem, 73 | "use_attention": True, 74 | "attention_num_transformer_units": 1, 75 | "attention_dim": hidden, 76 | "attention_num_heads": 1, 77 | "attention_head_dim": hidden, 78 | "attention_position_wise_mlp_dim": hidden, 79 | "attention_memory_inference": seq_len, 80 | "attention_memory_training": seq_len, 81 | # "attention_use_n_prev_actions": 1, 82 | } 83 | 84 | models = [ 85 | temporal_model, 86 | rnn_model, 87 | attn_model, 88 | # dnc, 89 | ] 90 | 91 | CFG = { 92 | # Our specific trainer type 93 | "ray_trainer": ImpalaTrainer, 94 | # Ray specific config sent to ray.tune or ray.rllib trainer 95 | "ray": { 96 | # These are rllib/ray specific 97 | "env_config": env_cfg, 98 | "framework": "torch", 99 | "model": grid_search(models), 100 | "num_workers": 2, 101 | # "num_cpus_per_worker": 16, 102 | # "num_envs_per_worker": 8, 103 | # "num_gpus_per_worker": 0.1, 104 | "num_gpus": 1, 105 | "env": RepeatAfterMeEnv.__name__, 106 | "entropy_coeff": 0.001, 107 | "vf_loss_coeff": 1e-4, 108 | "gamma": 0.99, 109 | # Use these to prevent losing reward on the final step 110 | # due to overflow 111 | # "horizon": seq_len - 1, 112 | "batch_mode": "complete_episodes", 113 | "vtrace": False, 114 | "lr": 0.0005, 115 | }, 116 | "tune": { 117 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 118 | "stop": {"info/num_steps_trained": 1e6}, 119 | }, 120 | } 121 | -------------------------------------------------------------------------------- /src/cfg/sparse_nav.py: -------------------------------------------------------------------------------- 1 | from cfg import base 2 | from custom_metrics import EvalMetrics 3 | from models.sparse_ray_graph import RaySparseObsGraph 4 | from ray.tune import grid_search 5 | import torch 6 | import torch_geometric 7 | from models.sparse_edge_selectors.temporal import TemporalEdge 8 | from models.sparse_edge_selectors.distance import SpatialEdge 9 | 10 | import os 11 | from copy import deepcopy 12 | 13 | # seq_len must be the same as episode length 14 | seq_len = 128 15 | hidden = 64 16 | act_dim = 3 17 | 18 | gc = torch_geometric.nn.Sequential( 19 | "x, edge_index, weights", 20 | [ 21 | (torch_geometric.nn.GraphConv(64, 64), "x, edge_index -> x"), 22 | torch.nn.Tanh(), 23 | (torch_geometric.nn.GraphConv(64, 64), "x, edge_index -> x"), 24 | torch.nn.Tanh(), 25 | ], 26 | ) 27 | 28 | base_graph = { 29 | "custom_model": RaySparseObsGraph, 30 | "custom_model_config": { 31 | "gnn_input_size": hidden, 32 | "gnn_output_size": hidden, 33 | "gnn": gc, 34 | "edge_selectors": None, 35 | "use_prev_action": True, 36 | }, 37 | "max_seq_len": seq_len, 38 | } 39 | 40 | temp = deepcopy(base_graph) 41 | temp["custom_model_config"]["edge_selectors"] = TemporalEdge(1) 42 | 43 | pose = deepcopy(base_graph) 44 | pose["custom_model_config"]["edge_selectors"] = SpatialEdge( 45 | max_distance=0.25, pose_slice=slice(2, 4) 46 | ) 47 | 48 | models = [ 49 | temp, 50 | # pose 51 | ] 52 | 53 | CFG = base.CFG 54 | CFG["ray"]["num_workers"] = 5 55 | CFG["ray"]["num_gpus"] = 1.0 56 | # For rollout workers 57 | CFG["ray"]["num_gpus_per_worker"] = 0.2 58 | CFG["ray"]["num_cpus_per_worker"] = 2 59 | CFG["ray"]["train_batch_size"] = 1024 60 | CFG["ray"]["rollout_fragment_length"] = seq_len 61 | CFG["tune"] = { 62 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 63 | "stop": {"info/num_steps_trained": 5e6}, 64 | } 65 | 66 | CFG["ray"]["model"] = grid_search(models) 67 | 68 | if os.environ.get("DEBUG", False): 69 | CFG["ray"]["model"] = temp 70 | CFG["ray"]["num_workers"] = 0 71 | CFG["ray"]["num_gpus"] = 0.3 72 | # CFG["ray"]["evaluation_num_workers"] = 1 73 | # CFG["ray"]["evaluation_interval"] = 1 74 | # CFG["ray"]["callbacks"] = EvalMetrics 75 | # CFG["ray"]["num_gpus"] = 0 76 | # CFG["ray"]["rollout_fragment_length"] = CFG["ray"]["train_batch_size"] 77 | # CFG["ray"]["model"]["custom_model_config"]["export_gradients"] = True 78 | CFG["ray"]["train_batch_size"] = 128 79 | CFG["ray"]["rollout_fragment_length"] = 128 80 | # CFG["tune"] = { 81 | # "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 82 | # "stop": {"info/num_steps_trained": 2048}, 83 | # } 84 | -------------------------------------------------------------------------------- /src/cfg/spatial_sweep.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from typing import Dict, Any 3 | from cfg import base 4 | from custom_metrics import EvalMetrics 5 | from models.ray_graph import RayObsGraph 6 | from models.ray_dnc import DNCMemory 7 | from models.edge_selectors.temporal import TemporalBackedge 8 | from models.edge_selectors.bernoulli import BernoulliEdge 9 | from models.edge_selectors.distance import CosineEdge, SpatialEdge 10 | from models.edge_selectors.dense import DenseEdge 11 | from ray.tune import grid_search 12 | import torch 13 | import torch_geometric 14 | 15 | import os 16 | 17 | seq_len = 128 18 | hidden = 32 19 | gsize = seq_len + 1 20 | act_dim = 3 21 | 22 | 23 | dgc = torch_geometric.nn.Sequential( 24 | "x, adj, weights, B, N", 25 | [ 26 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 27 | (torch.nn.Tanh()), 28 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 29 | (torch.nn.Tanh()), 30 | ], 31 | ) 32 | dgc.name = "GraphConv_2h" 33 | 34 | dgc3 = torch_geometric.nn.Sequential( 35 | "x, adj, weights, B, N", 36 | [ 37 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 38 | (torch.nn.Tanh()), 39 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 40 | (torch.nn.Tanh()), 41 | (torch_geometric.nn.DenseGraphConv(hidden, hidden), "x, adj -> x"), 42 | (torch.nn.Tanh()), 43 | ], 44 | ) 45 | dgc3.name = "GraphConv_3h" 46 | 47 | gcn = torch_geometric.nn.Sequential( 48 | "x, adj, weights, B, N", 49 | [ 50 | (torch_geometric.nn.DenseGCNConv(hidden, hidden), "x, adj -> x"), 51 | (torch.nn.Tanh()), 52 | (torch_geometric.nn.DenseGCNConv(hidden, hidden), "x, adj -> x"), 53 | (torch.nn.Tanh()), 54 | ], 55 | ) 56 | gcn.name = "GCN_2h" 57 | 58 | gcn3 = torch_geometric.nn.Sequential( 59 | "x, adj, weights, B, N", 60 | [ 61 | (torch_geometric.nn.DenseGCNConv(hidden, hidden), "x, adj -> x"), 62 | (torch.nn.Tanh()), 63 | (torch_geometric.nn.DenseGCNConv(hidden, hidden), "x, adj -> x"), 64 | (torch.nn.Tanh()), 65 | (torch_geometric.nn.DenseGCNConv(hidden, hidden), "x, adj -> x"), 66 | (torch.nn.Tanh()), 67 | ], 68 | ) 69 | gcn3.name = "GCN_3h" 70 | 71 | sage = torch_geometric.nn.Sequential( 72 | "x, adj, weights, B, N", 73 | [ 74 | (torch_geometric.nn.DenseSAGEConv(hidden, hidden), "x, adj -> x"), 75 | (torch.nn.Tanh()), 76 | (torch_geometric.nn.DenseSAGEConv(hidden, hidden), "x, adj -> x"), 77 | (torch.nn.Tanh()), 78 | ], 79 | ) 80 | sage.name = "SAGE_2h" 81 | 82 | sage3 = torch_geometric.nn.Sequential( 83 | "x, adj, weights, B, N", 84 | [ 85 | (torch_geometric.nn.DenseSAGEConv(hidden, hidden), "x, adj -> x"), 86 | (torch.nn.Tanh()), 87 | (torch_geometric.nn.DenseSAGEConv(hidden, hidden), "x, adj -> x"), 88 | (torch.nn.Tanh()), 89 | (torch_geometric.nn.DenseSAGEConv(hidden, hidden), "x, adj -> x"), 90 | (torch.nn.Tanh()), 91 | ], 92 | ) 93 | sage3.name = "SAGE_3h" 94 | 95 | gin_nn = torch.nn.Sequential( 96 | torch.nn.Linear(hidden, hidden), 97 | torch.nn.Tanh(), 98 | torch.nn.Linear(hidden, hidden), 99 | torch.nn.Tanh(), 100 | ) 101 | gin = torch_geometric.nn.Sequential( 102 | "x, adj, weights, B, N", 103 | [ 104 | (torch_geometric.nn.DenseGINConv(gin_nn, train_eps=True), "x, adj -> x"), 105 | (torch_geometric.nn.DenseGINConv(gin_nn, train_eps=True), "x, adj -> x"), 106 | ], 107 | ) 108 | gin.name = "GIN_2h_tanh" 109 | 110 | gnns = [ 111 | gcn, 112 | gcn3, 113 | sage, 114 | sage3, 115 | gin, 116 | dgc, 117 | dgc3, 118 | ] 119 | 120 | for gnn in gnns: 121 | # Monkey patch reprs so tensorboard can parse 122 | # logfile names 123 | gnn.__class__.__repr__ = lambda self: self.name 124 | 125 | models = { 126 | "custom_model": RayObsGraph, 127 | "custom_model_config": { 128 | "graph_size": gsize, 129 | "gnn_input_size": hidden, 130 | "gnn_output_size": hidden, 131 | "gnn": grid_search(gnns), 132 | "use_prev_action": True, 133 | "edge_selectors": SpatialEdge(max_distance=0.25, pose_slice=slice(2, 4)), 134 | }, 135 | "max_seq_len": seq_len, 136 | } 137 | 138 | 139 | CFG = base.CFG 140 | CFG["ray"]["num_workers"] = 8 141 | CFG["ray"]["model"] = models # grid_search(models) 142 | 143 | # this corresponds to the number of learner GPUs used, 144 | # not the total used for the environments/rollouts 145 | # Since this is the bottleneck, we let it use an entire 1024 146 | CFG["ray"]["num_gpus"] = 1.0 147 | 148 | # For rollout workers 149 | CFG["ray"]["num_gpus_per_worker"] = 0.2 150 | CFG["ray"]["num_cpus_per_worker"] = 2 151 | 152 | # At batch sizes of 1024 and 2048, GPU learn time is roughly the same per sample 153 | CFG["ray"]["train_batch_size"] = 1024 154 | CFG["ray"]["rollout_fragment_length"] = seq_len 155 | 156 | CFG["tune"] = { 157 | "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 158 | "stop": {"info/num_steps_trained": 10e6}, 159 | } 160 | 161 | 162 | if os.environ.get("DEBUG", False): 163 | # CFG["ray"]["model"] = bernoulli_reg 164 | CFG["ray"]["num_workers"] = 0 165 | CFG["ray"]["num_gpus"] = 0.3 166 | # CFG["ray"]["evaluation_num_workers"] = 1 167 | # CFG["ray"]["evaluation_interval"] = 1 168 | # CFG["ray"]["callbacks"] = EvalMetrics 169 | # CFG["ray"]["num_gpus"] = 0 170 | # CFG["ray"]["rollout_fragment_length"] = CFG["ray"]["train_batch_size"] 171 | # CFG["ray"]["model"]["custom_model_config"]["export_gradients"] = True 172 | CFG["ray"]["train_batch_size"] = 128 173 | CFG["ray"]["rollout_fragment_length"] = 64 174 | # CFG["tune"] = { 175 | # "goal_metric": {"metric": "episode_reward_mean", "mode": "max"}, 176 | # "stop": {"info/num_steps_trained": 2048}, 177 | # } 178 | -------------------------------------------------------------------------------- /src/cfg/train.py: -------------------------------------------------------------------------------- 1 | from cfg import base 2 | 3 | 4 | CFG = base.CFG 5 | CFG["ray"]["num_workers"] = 20 6 | -------------------------------------------------------------------------------- /src/cfg/train_ae.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ray.rllib.agents.impala import ImpalaTrainer 4 | from ray.tune import register_env 5 | 6 | from preprocessors.compass_fix import CompassFix 7 | from preprocessors.semantic.quantized_mesh import QuantizedSemanticMask 8 | from preprocessors.quantized_depth import QuantizedDepth 9 | from preprocessors.ghost_rgb import GhostRGB 10 | 11 | from models.ray_ae import RayAE 12 | 13 | from rewards.basic import BasicReward 14 | from rewards.path import PathReward 15 | 16 | from rayenv import NavEnv 17 | from custom_metrics import AEMetrics 18 | 19 | 20 | register_env(NavEnv.__name__, NavEnv) 21 | cfg_dir = os.path.abspath(os.path.dirname(__file__)) 22 | 23 | CFG = { 24 | # Our specific trainer type 25 | "ray_trainer": ImpalaTrainer, 26 | # Ray specific config sent to ray.tune or ray.rllib trainer 27 | "ray": { 28 | "env_config": { 29 | # Path to the habitat yaml config, that specifies sensor info, 30 | # which maps to use, etc. 31 | "hab_cfg_path": f"{cfg_dir}/objectnav_mp3d_train_vae.yaml", 32 | # Habitat preprocessors change the observation space in the simulator 33 | # These are loaded and run in-order 34 | "preprocessors": { 35 | "compass": CompassFix, 36 | "semantic": QuantizedSemanticMask, 37 | "depth": QuantizedDepth, 38 | "rgb_visualization": GhostRGB, 39 | }, 40 | "rewards": {}, 41 | }, 42 | # These are rllib/ray specific 43 | "framework": "torch", 44 | "model": { 45 | "framestack": False, 46 | "custom_model": RayAE, 47 | }, 48 | "num_workers": 4, 49 | "num_cpus_per_worker": 4, 50 | # Total GPU usage: num_gpus (trainer proc) + num_gpus_per_worker (workers) 51 | "num_gpus_per_worker": 0.25, 52 | # this corresponds to the number of learner GPUs used, 53 | # not the total used for the environments/rollouts 54 | "num_gpus": 0.3, 55 | # Size of batches (in timesteps) placed in the learner queue 56 | "rollout_fragment_length": 256, 57 | # Total number of timesteps to train per batch 58 | "train_batch_size": 1024, 59 | "lr": 0.0001, 60 | "env": NavEnv, 61 | "callbacks": AEMetrics, 62 | }, 63 | "tune": { 64 | "goal_metric": {"metric": "custom_metrics/ae_combined_loss", "mode": "min"}, 65 | }, 66 | } 67 | -------------------------------------------------------------------------------- /src/cfg/train_vae.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ray.rllib.agents.impala import ImpalaTrainer 4 | from ray.tune import register_env, grid_search 5 | 6 | from preprocessors.compass_fix import CompassFix 7 | from preprocessors.semantic.quantized_mesh import QuantizedSemanticMask 8 | from preprocessors.quantized_depth import QuantizedDepth 9 | from preprocessors.ghost_rgb import GhostRGB 10 | 11 | from models.ray_vae import RayVAE 12 | 13 | from rewards.basic import BasicReward 14 | from rewards.path import PathReward 15 | 16 | from rayenv import NavEnv 17 | from custom_metrics import VAEMetrics 18 | 19 | 20 | register_env(NavEnv.__name__, NavEnv) 21 | cfg_dir = os.path.abspath(os.path.dirname(__file__)) 22 | 23 | CFG = { 24 | # Our specific trainer type 25 | "ray_trainer": ImpalaTrainer, 26 | # Ray specific config sent to ray.tune or ray.rllib trainer 27 | "ray": { 28 | "env_config": { 29 | # Path to the habitat yaml config, that specifies sensor info, 30 | # which maps to use, etc. 31 | "hab_cfg_path": f"{cfg_dir}/objectnav_mp3d_val_vae.yaml", 32 | # Habitat preprocessors change the observation space in the simulator 33 | # These are loaded and run in-order 34 | "preprocessors": { 35 | "compass": CompassFix, 36 | "semantic": QuantizedSemanticMask, 37 | "depth": QuantizedDepth, 38 | "rgb_visualization": GhostRGB, 39 | }, 40 | "rewards": {}, 41 | }, 42 | # These are rllib/ray specific 43 | "framework": "torch", 44 | "model": { 45 | "framestack": False, 46 | "custom_model": RayVAE, 47 | "custom_model_config": { 48 | "z_dim": grid_search([384, 512]), 49 | "depth_weight": 1.0, 50 | "semantic_weight": 1.0, 51 | "elbo_beta": 1.0, 52 | }, 53 | }, 54 | "num_workers": 5, 55 | "num_cpus_per_worker": 4, 56 | # Total GPU usage: num_gpus (trainer proc) + num_gpus_per_worker (workers) 57 | "num_gpus_per_worker": 0.3, 58 | # this corresponds to the number of learner GPUs used, 59 | # not the total used for the environments/rollouts 60 | "num_gpus": 0.3, 61 | # Size of batches (in timesteps) placed in the learner queue 62 | "rollout_fragment_length": 256, 63 | # Total number of timesteps to train per batch 64 | "train_batch_size": 512, 65 | "lr": 0.0001, 66 | "env": NavEnv, 67 | "callbacks": VAEMetrics, 68 | }, 69 | "tune": { 70 | "goal_metric": {"metric": "custom_metrics/ae_combined_loss", "mode": "min"}, 71 | "stop": {"info/num_steps_trained": 10e6}, 72 | }, 73 | } 74 | -------------------------------------------------------------------------------- /src/cfg/train_vae_d.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ray.rllib.agents.impala import ImpalaTrainer 4 | from ray.tune import register_env, grid_search 5 | 6 | from preprocessors.compass_fix import CompassFix 7 | from preprocessors.semantic.quantized_mesh import QuantizedSemanticMask 8 | from preprocessors.quantized_depth import QuantizedDepth 9 | from preprocessors.ghost_rgb import GhostRGB 10 | 11 | from models.ray_vae_d import DepthRayVAE 12 | 13 | from rewards.basic import BasicReward 14 | from rewards.path import PathReward 15 | 16 | from rayenv import NavEnv 17 | from custom_metrics import VAEMetrics, VAEEvalMetrics 18 | 19 | 20 | register_env(NavEnv.__name__, NavEnv) 21 | cfg_dir = os.path.abspath(os.path.dirname(__file__)) 22 | 23 | env_cfg = { 24 | # Path to the habitat yaml config, that specifies sensor info, 25 | # which maps to use, etc. 26 | "hab_cfg_path": f"{cfg_dir}/objectnav_mp3d_train_vae.yaml", 27 | # Habitat preprocessors change the observation space in the simulator 28 | # These are loaded and run in-order 29 | "preprocessors": {}, 30 | # Multiple reward functions may be implemented at once, 31 | # they are summed together 32 | # "rewards": {"stop_goal": BasicReward, "goal_path": PathReward}, 33 | "rewards": {}, 34 | # We can't fit all the scenes into memory, so use fewer 35 | # "scene_proportion": 0.5 36 | } 37 | 38 | # Change the path for our validation set 39 | val_env_cfg = { 40 | **env_cfg, # type: ignore 41 | "hab_cfg_path": f"{cfg_dir}/objectnav_mp3d_train_val_mini.yaml", 42 | } 43 | 44 | CFG = { 45 | # Our specific trainer type 46 | "ray_trainer": ImpalaTrainer, 47 | # Ray specific config sent to ray.tune or ray.rllib trainer 48 | "ray": { 49 | "env_config": env_cfg, 50 | # These are rllib/ray specific 51 | "framework": "torch", 52 | "model": { 53 | "framestack": False, 54 | "custom_model": DepthRayVAE, 55 | "custom_model_config": { 56 | "z_dim": 64, # grid_search([64, 128]), 57 | "depth_weight": 1.0, 58 | "rgb_weight": 1.0, 59 | "elbo_beta": 0.01, 60 | }, 61 | }, 62 | "num_workers": 5, 63 | "num_cpus_per_worker": 2, 64 | # Total GPU usage: num_gpus (trainer proc) + num_gpus_per_worker (workers) 65 | "num_gpus_per_worker": 0.15, 66 | # this corresponds to the number of learner GPUs used, 67 | # not the total used for the environments/rollouts 68 | "num_gpus": 1.0, 69 | # Size of batches (in timesteps) placed in the learner queue 70 | "rollout_fragment_length": 256, 71 | # Total number of timesteps to train per batch 72 | "train_batch_size": 1024, 73 | "replay_proportion": 5.0, 74 | "replay_buffer_num_slots": 128, 75 | # "lr": 0.0005, 76 | "lr_schedule": [[0, 0.001], [250000, 0.0005], [500000, 0.0001]], 77 | "env": NavEnv, 78 | "callbacks": VAEMetrics, 79 | "evaluation_interval": 10, 80 | "evaluation_num_episodes": 10, 81 | "evaluation_config": { 82 | "env_config": val_env_cfg, 83 | "callbacks": VAEEvalMetrics, 84 | }, 85 | # "custom_eval_function": vae_eval, 86 | "evaluation_num_workers": 1, # Must be >0 to get OpenGL 87 | }, 88 | "tune": { 89 | "goal_metric": {"metric": "custom_metrics/ae_combined_loss", "mode": "min"}, 90 | "stop": {"info/num_steps_trained": 10e6}, 91 | }, 92 | } 93 | 94 | if os.environ.get("DEBUG", False): 95 | print("-------DEBUG---------") 96 | CFG["ray"]["num_workers"] = 1 97 | CFG["ray"]["model"]["custom_model_config"]["z_dim"] = 64 98 | CFG["ray"]["env_config"]["scene_proportion"] = 0.05 99 | print(CFG) 100 | -------------------------------------------------------------------------------- /src/cfg/train_vae_debug.py: -------------------------------------------------------------------------------- 1 | import os 2 | from cfg import train_vae 3 | 4 | 5 | cfg_dir = os.path.abspath(os.path.dirname(__file__)) 6 | 7 | CFG = train_vae.CFG 8 | CFG["ray"]["env_config"][ 9 | "hab_cfg_path" 10 | ] = f"{cfg_dir}/objectnav_mp3d_train_val_mini.yaml" 11 | CFG["ray"]["num_workers"] = 0 12 | CFG["tune"]["stop"] = {"training_iteration": 1} 13 | CFG["ray"]["train_batch_size"] = 8 14 | CFG["ray"]["rollout_fragment_length"] = 8 15 | -------------------------------------------------------------------------------- /src/cfg/train_vae_rgbd.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ray.rllib.agents.impala import ImpalaTrainer 4 | from ray.tune import register_env, grid_search 5 | 6 | from preprocessors.compass_fix import CompassFix 7 | from preprocessors.semantic.quantized_mesh import QuantizedSemanticMask 8 | from preprocessors.quantized_depth import QuantizedDepth 9 | from preprocessors.ghost_rgb import GhostRGB 10 | 11 | from models.ray_vae_rgbd import RGBDRayVAE 12 | 13 | from rewards.basic import BasicReward 14 | from rewards.path import PathReward 15 | 16 | from rayenv import NavEnv 17 | from custom_metrics import VAEMetrics 18 | 19 | 20 | register_env(NavEnv.__name__, NavEnv) 21 | cfg_dir = os.path.abspath(os.path.dirname(__file__)) 22 | 23 | CFG = { 24 | # Our specific trainer type 25 | "ray_trainer": ImpalaTrainer, 26 | # Ray specific config sent to ray.tune or ray.rllib trainer 27 | "ray": { 28 | "env_config": { 29 | # Path to the habitat yaml config, that specifies sensor info, 30 | # which maps to use, etc. 31 | "hab_cfg_path": f"{cfg_dir}/objectnav_mp3d_val_vae_rgbd.yaml", 32 | # Habitat preprocessors change the observation space in the simulator 33 | # These are loaded and run in-order 34 | "preprocessors": { 35 | "compass": CompassFix, 36 | }, 37 | "rewards": {}, 38 | }, 39 | # These are rllib/ray specific 40 | "framework": "torch", 41 | "model": { 42 | "framestack": False, 43 | "custom_model": RGBDRayVAE, 44 | "custom_model_config": { 45 | "z_dim": 128, # grid_search([32, 64, 256]), 46 | "depth_weight": 2.0, 47 | "rgb_weight": 1.0, 48 | "elbo_beta": 0.5, 49 | }, 50 | }, 51 | "num_workers": 12, 52 | "num_cpus_per_worker": 2, 53 | # Total GPU usage: num_gpus (trainer proc) + num_gpus_per_worker (workers) 54 | "num_gpus_per_worker": 0.15, 55 | # this corresponds to the number of learner GPUs used, 56 | # not the total used for the environments/rollouts 57 | "num_gpus": 1, 58 | # Size of batches (in timesteps) placed in the learner queue 59 | "rollout_fragment_length": 256, 60 | # Total number of timesteps to train per batch 61 | "train_batch_size": 1024, 62 | "lr": 0.01, 63 | "env": NavEnv, 64 | "callbacks": VAEMetrics, 65 | }, 66 | "tune": { 67 | "goal_metric": {"metric": "custom_metrics/ae_combined_loss", "mode": "min"}, 68 | "stop": {"info/num_steps_trained": 10e6}, 69 | }, 70 | } 71 | 72 | if os.environ.get("DEBUG", False): 73 | print("-------DEBUG---------") 74 | CFG["ray"]["num_workers"] = 1 75 | CFG["ray"]["model"]["custom_model_config"]["z_dim"] = 64 76 | print(CFG) 77 | -------------------------------------------------------------------------------- /src/cfg/tune.py: -------------------------------------------------------------------------------- 1 | from ray import tune 2 | from ray.tune.suggest.hyperopt import HyperOptSearch 3 | 4 | from cfg import train 5 | 6 | CFG = train.CFG 7 | CFG["ray"]["lr"] = tune.loguniform(1e-2, 1e-4) 8 | CFG.update( 9 | { 10 | "tune": { 11 | "stop": {"training_iteration": 250}, 12 | "num_samples": 40, 13 | "search_alg": HyperOptSearch(metric="episode_reward_mean", mode="max"), 14 | } 15 | } 16 | ) 17 | -------------------------------------------------------------------------------- /src/clean_results.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for e in ~/ray_results/*; do 4 | size=$(du -s $e | cut -f1) 5 | if [[ $size -lt 2000 ]]; then 6 | echo "Delete $e, size $size" 7 | rm -r $e 8 | fi 9 | done 10 | -------------------------------------------------------------------------------- /src/graphenv.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | 4 | import numpy as np 5 | import networkx as nx 6 | import ray 7 | import cv2 8 | from ray.rllib.agents import ppo 9 | from habitat.utils.visualizations import maps 10 | from habitat.utils.visualizations import utils 11 | 12 | from server.render import RENDER_ROOT, CLIENT_LOCK 13 | from rayenv import NavEnv 14 | 15 | 16 | class GraphNavEnv(NavEnv): 17 | graph = nx.Graph() 18 | node_map = None 19 | node_ctr = 0 20 | max_edge_dist = 0.3 21 | 22 | def add_node(self, obs, info): 23 | """Add a node using the current agent position 24 | and observation""" 25 | edges = [] 26 | for n_idx in self.graph.nodes(): 27 | cmp_data = self.graph.nodes[n_idx]["data"] 28 | if ( 29 | np.linalg.norm(cmp_data["pose"]["r"] - info["pose2d"]["r"]) 30 | < self.max_edge_dist 31 | ): 32 | edges += [(self.node_ctr, n_idx)] 33 | 34 | if info["top_down_map"]: 35 | map_pose = info["top_down_map"]["agent_map_coord"] 36 | map_angle = info["top_down_map"]["agent_angle"] 37 | else: 38 | map_pose = None 39 | map_angle = None 40 | 41 | node_data = { 42 | "pose": info["pose2d"], 43 | "obs": obs, 44 | # pose tfed for visualization} 45 | "map_pose": map_pose, 46 | "map_angle": map_angle, 47 | } 48 | self.graph.add_node(self.node_ctr, data=node_data) 49 | self.graph.add_edges_from(edges) 50 | self.node_ctr += 1 51 | 52 | def step(self, action): 53 | obs, reward, done, info = super().step(action) 54 | # Only visualize if someone is viewing via webbrowser 55 | if CLIENT_LOCK.exists(): 56 | if self.visualize_lvl >= 2 and info.get("top_down_map") is not None: 57 | self.add_node_to_map(info) 58 | self.emit_debug_graph(info) 59 | self.add_node(obs, info) 60 | return obs, reward, done, info 61 | 62 | def add_node_to_map(self, info): 63 | """Draw current position as a node to the node_map""" 64 | if self.node_map is None: 65 | self.node_map = maps.colorize_topdown_map( 66 | info["top_down_map"]["map"], 67 | ) 68 | 69 | if not info["top_down_map"]: 70 | return 71 | 72 | pose = info["top_down_map"]["agent_map_coord"] 73 | cv_pose = (pose[1], pose[0]) 74 | self.node_map = cv2.circle( 75 | self.node_map, cv_pose, radius=10, color=(0, 69, 255), thickness=3 76 | ) 77 | 78 | def emit_debug_graph(self, info): 79 | img = self.node_map.copy() 80 | 81 | if img.shape[0] > img.shape[1]: 82 | img = np.rot90(img, 1) 83 | 84 | # scale top down map to align with rgb view 85 | old_h, old_w, _ = img.shape 86 | top_down_height = self.hab_cfg.SIMULATOR.RGB_SENSOR.HEIGHT 87 | top_down_width = int(float(top_down_height) / old_h * old_w) 88 | # cv2 resize (dsize is width first) 89 | img = cv2.resize( 90 | img, 91 | (top_down_width, top_down_height), 92 | interpolation=cv2.INTER_CUBIC, 93 | ) 94 | 95 | tmp_impath = f"{self.render_dir}/graph.jpg.buf" 96 | impath = f"{self.render_dir}/graph.jpg" 97 | _, buf = cv2.imencode(".jpg", img) 98 | buf.tofile(tmp_impath) 99 | # We do this so we don't accidentally load a half-written img 100 | os.replace(tmp_impath, impath) 101 | 102 | def get_info(self, obs): 103 | info = super().get_info(obs) 104 | agent_state = self._env.sim.agents[0].state 105 | pose3d = {"r": agent_state.position, "q": agent_state.rotation} 106 | info["pose3d"] = pose3d 107 | # In habitat, y is up vector 108 | info["pose2d"] = { 109 | "r": np.array((agent_state.position[0], agent_state.position[2])), 110 | "theta": agent_state.rotation.copy(), 111 | } 112 | return info 113 | 114 | def reset(self): 115 | obs = super().reset() 116 | # Per-episode graph, place root node 117 | self.graph.clear() 118 | self.node_ctr = 0 119 | info = self.get_info(obs) 120 | self.node_map = None 121 | self.add_node(obs, info) 122 | return obs 123 | -------------------------------------------------------------------------------- /src/models/.gnn.py.swo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smorad/graph-conv-memory-paper/4401b87703a631e85347efa665d822e31a41b1c6/src/models/.gnn.py.swo -------------------------------------------------------------------------------- /src/models/ae.py: -------------------------------------------------------------------------------- 1 | from torchvision.models import resnext50_32x4d 2 | from torchvision.models.resnet import ResNet 3 | from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 4 | from ray.rllib.models import ModelCatalog 5 | 6 | import torch 7 | from torch import nn 8 | 9 | 10 | class CNNAutoEncoder(torch.nn.Module): 11 | def __init__(self, h_dim=2048): 12 | assert h_dim in [1024, 2048] 13 | super().__init__() 14 | self.h_dim = h_dim 15 | # Takes 32x32x44 16 | if h_dim == 2048: 17 | self.encoder = nn.Sequential( 18 | nn.Conv2d(43 + 1, 128, 5, stride=3), # b, 64, 10 19 | nn.ReLU(), 20 | nn.Conv2d(128, 256, 4, stride=2), # b, 96, 4, 4 21 | nn.ReLU(), 22 | nn.Conv2d(256, 512, 3, stride=1), # b, 128, 2, 2 23 | nn.ReLU(), 24 | nn.Flatten(), 25 | ) 26 | self.decoder = nn.Sequential( 27 | nn.Unflatten(1, (self.h_dim // 4, 2, 2)), 28 | nn.ConvTranspose2d(512, 256, 3, stride=1), # b, 128, 2, 2 29 | nn.ReLU(), 30 | nn.ConvTranspose2d(256, 128, 4, stride=2), # b, 96, 4, 4 31 | nn.ReLU(), 32 | nn.ConvTranspose2d(128, 43 + 1, 5, stride=3), # b, 64, 10 33 | nn.Sigmoid(), 34 | ) 35 | elif h_dim == 1024: 36 | self.encoder = nn.Sequential( 37 | nn.Conv2d(43 + 1, 128, 5, stride=3), # b, 64, 10 38 | nn.ReLU(), 39 | nn.Conv2d(128, 196, 4, stride=2), # b, 96, 4, 4 40 | nn.ReLU(), 41 | nn.Conv2d(196, 256, 3, stride=1), # b, 128, 2, 2 42 | nn.ReLU(), 43 | nn.Flatten(), 44 | ) 45 | self.decoder = nn.Sequential( 46 | nn.Unflatten(1, (self.h_dim // 4, 2, 2)), 47 | nn.ConvTranspose2d(256, 196, 3, stride=1), # b, 128, 2, 2 48 | nn.ReLU(), 49 | nn.ConvTranspose2d(196, 128, 4, stride=2), # b, 96, 4, 4 50 | nn.ReLU(), 51 | nn.ConvTranspose2d(128, 43 + 1, 5, stride=3), # b, 64, 10 52 | nn.Sigmoid(), 53 | ) 54 | 55 | def encode(self, x): 56 | return self.encoder(x) 57 | 58 | def decode(self, z): 59 | return self.decoder(z) 60 | 61 | def forward(self, x): 62 | return self.decoder(self.encoder(x)) 63 | -------------------------------------------------------------------------------- /src/models/depth_vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class DepthVAE(nn.Module): 6 | # On CPU this takes 2.5ms to encode 7 | # on GPU, 886 microsecs 8 | def __init__(self, h_dim=512, z_dim=64): 9 | assert h_dim in [512] 10 | super().__init__() 11 | self.z_dim = z_dim 12 | self.encoder = nn.Sequential( 13 | nn.Conv2d(1, 32, 5, stride=3), # b, 64, 10 14 | nn.BatchNorm2d(32), 15 | nn.ReLU(), 16 | nn.Conv2d(32, 64, 4, stride=2), # b, 96, 4, 4 17 | nn.BatchNorm2d(64), 18 | nn.ReLU(), 19 | nn.Conv2d(64, 128, 3, stride=1), # b, 128, 2, 2 20 | nn.BatchNorm2d(128), 21 | nn.ReLU(), 22 | nn.Flatten(), 23 | ) 24 | 25 | self.fc1 = nn.Linear(h_dim, z_dim) 26 | self.fc2 = nn.Linear(h_dim, z_dim) 27 | 28 | self.fc3 = nn.Linear(z_dim, h_dim) 29 | 30 | self.decoder = nn.Sequential( 31 | nn.Unflatten(1, (h_dim // 4, 2, 2)), # TODO: This should be variable 32 | nn.ConvTranspose2d(128, 64, 3, stride=1), # b, 128, 2, 2 33 | nn.BatchNorm2d(64), 34 | nn.ReLU(), 35 | nn.ConvTranspose2d(64, 32, 4, stride=2), # b, 96, 4, 4 36 | nn.BatchNorm2d(32), 37 | nn.ReLU(), 38 | nn.ConvTranspose2d(32, 1, 5, stride=3), # b, 64, 10 39 | nn.Sigmoid(), 40 | ) 41 | 42 | def reparameterize(self, mu, logvar): 43 | std = logvar.mul(0.5).exp_().to(logvar.device) 44 | # return torch.normal(mu, std) 45 | esp = torch.randn(*mu.size()).to(mu.device) 46 | z = mu + std * esp 47 | return z 48 | 49 | def bottleneck(self, h): 50 | mu, logvar = self.fc1(h), self.fc2(h) 51 | z = self.reparameterize(mu, logvar) 52 | return z, mu, logvar 53 | 54 | def encode(self, x): 55 | h = self.encoder(x) 56 | z, mu, logvar = self.bottleneck(h) 57 | return z, mu, logvar 58 | 59 | def decode(self, z): 60 | z = self.fc3(z) 61 | z = self.decoder(z) 62 | return z 63 | 64 | def forward(self, x): 65 | z, mu, logvar = self.encode(x) 66 | z = self.decode(z) 67 | return z, mu, logvar 68 | 69 | def kld_loss(self, mu, logvar): 70 | return -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp()) 71 | 72 | def loss_fn(self, recon_x, x, mu, logvar): 73 | BCE = nn.functional.binary_cross_entropy(recon_x, x, size_average=False) 74 | # BCE = F.mse_loss(recon_x, x, size_average=False) 75 | 76 | # see Appendix B from VAE paper: 77 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 78 | # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 79 | KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp()) 80 | 81 | return BCE + KLD, BCE, KLD 82 | -------------------------------------------------------------------------------- /src/models/edge_selectors/dense.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class DenseEdge(torch.nn.Module): 5 | """Add temporal bidirectional back edge, but only if we have >1 nodes 6 | E.g., node_{t} <-> node_{t-1}""" 7 | 8 | def __init__(self): 9 | super().__init__() 10 | 11 | def forward(self, nodes, adj_mats, edge_weights, num_nodes, B): 12 | """Since this is called for each obs, it is sufficient to make row/col 13 | for obs 1""" 14 | 15 | # TODO: Batch this like DistanceEdge 16 | for b in range(B): 17 | i = num_nodes[b] 18 | adj_mats[b][i, :i] = 1 19 | adj_mats[b][:i, i] = 1 20 | # Self edge 21 | adj_mats[b][i, i] = 1 22 | 23 | return adj_mats, edge_weights 24 | -------------------------------------------------------------------------------- /src/models/edge_selectors/distance.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Distance(torch.nn.Module): 5 | """Base class for edges based on the similarity between 6 | latent representations""" 7 | 8 | def __init__(self, max_distance, bidirectional=False): 9 | super().__init__() 10 | self.max_distance = max_distance 11 | self.bidirectional = bidirectional 12 | 13 | def forward(self, nodes, adj_mats, edge_weights, num_nodes, B): 14 | """Connect current obs to past obs based on distance of the node features""" 15 | B_idx = torch.arange(B) 16 | curr_nodes = nodes[B_idx, num_nodes[B_idx].squeeze()] 17 | dists = self.dist_fn(curr_nodes, nodes) 18 | batch_idxs, node_idxs = torch.where(dists < self.max_distance) 19 | # Remove entries beyond num_nodes 20 | # as well as num_nodes because we don't want 21 | # the self edge 22 | num_nodes_mask = node_idxs < num_nodes[batch_idxs] 23 | batch_idxs = batch_idxs.masked_select(num_nodes_mask) 24 | node_idxs = node_idxs.masked_select(num_nodes_mask) 25 | 26 | adj_mats[batch_idxs, num_nodes[batch_idxs].squeeze(), node_idxs] = 1 27 | if self.bidirectional: 28 | adj_mats[batch_idxs, node_idxs, num_nodes[batch_idxs].squeeze()] = 1 29 | 30 | return adj_mats, edge_weights 31 | 32 | 33 | class EuclideanEdge(Distance): 34 | """Mean per-dimension euclidean distance between obs vectors""" 35 | 36 | def __init__(self, max_distance): 37 | super().__init__(max_distance) 38 | 39 | def dist_fn(self, a, b): 40 | return torch.cdist(a, b).mean(dim=1) 41 | 42 | 43 | class CosineEdge(Distance): 44 | """Mean per-dimension cosine distance between obs vectors""" 45 | 46 | def __init__(self, max_distance): 47 | super().__init__(max_distance) 48 | self.cs = torch.nn.modules.distance.CosineSimilarity(dim=2) 49 | 50 | def dist_fn(self, a, b): 51 | a = torch.cat([a.unsqueeze(1)] * b.shape[1], dim=1) 52 | return self.cs(a, b) 53 | 54 | 55 | class SpatialEdge(Distance): 56 | """Euclidean distance representing the physical distance between two observations. 57 | Uses the slices a_pose_slice and b_pose_slice to extract the respective 58 | poses from the latent vectors""" 59 | 60 | def __init__(self, max_distance, a_pose_slice, b_pose_slice=None): 61 | super().__init__(max_distance) 62 | self.a_pose_slice = a_pose_slice 63 | if b_pose_slice: 64 | self.b_pose_slice = b_pose_slice 65 | else: 66 | self.b_pose_slice = a_pose_slice 67 | 68 | def dist_fn(self, a, b): 69 | a = torch.cat([a.unsqueeze(1)] * b.shape[1], dim=1) 70 | ra = a[:, :, self.a_pose_slice] 71 | rb = b[:, :, self.b_pose_slice] 72 | return torch.cdist(ra, rb).mean(dim=1) 73 | -------------------------------------------------------------------------------- /src/models/edge_selectors/self_edge.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class TemporalBackedge(torch.nn.Module): 5 | """Add temporal bidirectional back edge, but only if we have >1 nodes 6 | E.g., node_{t} <-> node_{t-1}""" 7 | 8 | def __init__(self, parent): 9 | self.parent = parent 10 | 11 | def forward(self, nodes, adj_mats, num_nodes, B): 12 | import pdb 13 | 14 | pdb.set_trace() 15 | -------------------------------------------------------------------------------- /src/models/edge_selectors/temporal.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import List 3 | 4 | # VISDOM TOP HALF PROVIDES BEST PERF 5 | 6 | # adj[0,3] = 1 7 | # neigh = matmul(adj, nodes) = nodes[0] 8 | # [i,j] => base[j] neighbor[i] 9 | # propagates from i to j 10 | 11 | # neighbor: torch.matmul(Adj[i, j], x) = x[i] = adj[i] 12 | # self: adj[j] 13 | # Vis: should be top half of visdom 14 | 15 | 16 | class TemporalBackedge(torch.nn.Module): 17 | """Add temporal directional back edge, e.g., node_{t} -> node_{t-1}""" 18 | 19 | def __init__(self, hops: List[int] = [1], direction="forward"): 20 | """ 21 | Hops: number of hops in the past to connect to 22 | E.g. [1] is t <- t-1, [2] is t <- t-2, 23 | [5,8] is t <- t-5 AND t <- t-8 24 | 25 | Direction: Directionality of graph edges. You likely want 26 | 'forward', which indicates information flowing from past 27 | to future. Backward is information from future to past, 28 | and both is both. 29 | """ 30 | super().__init__() 31 | self.hops = hops 32 | assert direction in ["forward", "backward", "both"] 33 | self.direction = direction 34 | 35 | def forward(self, nodes, adj_mats, edge_weights, num_nodes, B): 36 | # TODO: Fix this to work with multiple hops 37 | # assert self.hops == [1], "num_hops >1 not working yet" 38 | for hop in self.hops: 39 | [valid_batches] = torch.where(num_nodes >= hop) 40 | if self.direction in ["forward", "both"]: 41 | adj_mats[ 42 | valid_batches, 43 | num_nodes[valid_batches], 44 | num_nodes[valid_batches] - hop, 45 | ] = 1 46 | if self.direction in ["backward", "both"]: 47 | adj_mats[ 48 | valid_batches, 49 | num_nodes[valid_batches] - hop, 50 | num_nodes[valid_batches], 51 | ] = 1 52 | 53 | return adj_mats, edge_weights 54 | -------------------------------------------------------------------------------- /src/models/gnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Sequential, Linear, ReLU 3 | from torch_geometric.nn import MessagePassing, EdgeConv 4 | 5 | 6 | class EgoGNNModel(MessagePassing): 7 | """ 8 | The model takes as input a euclidean graph. Each node contains a pose relative to 9 | the current pose and the observation from said pose. 10 | 11 | The output of the model is logits for discrete actions, and logit for creating 12 | a node from the current observation 13 | """ 14 | 15 | def __init__(self, F_in, F_out): 16 | super(EdgeConv, self).__init__(aggr="max") # "Max" aggregation. 17 | self.mlp = Sequential(Linear(2 * F_in, F_out), ReLU(), Linear(F_out, F_out)) 18 | 19 | def forward(self, x, edge_index): 20 | # x has shape [N, F_in] 21 | # edge_index has shape [2, E] 22 | return self.propagate(edge_index, x=x) # shape [N, F_out] 23 | 24 | def message(self, x_i, x_j): 25 | # x_i has shape [E, F_in] 26 | # x_j has shape [E, F_in] 27 | edge_features = torch.cat([x_i, x_j - x_i], dim=1) # shape [E, 2 * F_in] 28 | return self.mlp(edge_features) # shape [E, F_out] 29 | -------------------------------------------------------------------------------- /src/models/ray_ae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | from torch import nn 4 | from typing import Union, Dict, List 5 | import ray 6 | from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 7 | from ray.rllib.models.modelv2 import ModelV2, restore_original_dimensions 8 | from ray.rllib.utils.typing import ModelConfigDict, TensorType 9 | from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC 10 | 11 | 12 | from models.ae import CNNAutoEncoder 13 | 14 | 15 | class RayAE(TorchModelV2, CNNAutoEncoder): 16 | def __init__( 17 | self, 18 | obs_space: gym.spaces.Space, 19 | action_space: gym.spaces.Space, 20 | num_outputs: int, 21 | model_config: ModelConfigDict, 22 | name: str, 23 | ): 24 | super().__init__(obs_space, action_space, num_outputs, model_config, name) 25 | CNNAutoEncoder.__init__(self) 26 | self.sem_loss_fn = nn.CosineEmbeddingLoss(reduction="mean") 27 | self.depth_loss_fn = nn.MSELoss(reduction="mean") 28 | 29 | def variables( 30 | self, as_dict: bool = False 31 | ) -> Union[List[TensorType], Dict[str, TensorType]]: 32 | p = list(self.parameters()) 33 | if as_dict: 34 | return {k: p[i] for i, k in enumerate(self.state_dict().keys())} 35 | return p 36 | 37 | def trainable_variables( 38 | self, as_dict: bool = False 39 | ) -> Union[List[TensorType], Dict[str, TensorType]]: 40 | if as_dict: 41 | return { 42 | k: v for k, v in self.variables(as_dict=True).items() if v.requires_grad # type: ignore 43 | } 44 | return [v for v in self.variables() if v.requires_grad] # type: ignore 45 | 46 | def forward(self, input_dict, state, seq_lens): 47 | """Compute autoencoded image. Note the returned "logits" 48 | are random, as we want random actions""" 49 | self.curr_ae_input = self.to_img(input_dict) 50 | # TODO figure out why inheritance is such shit 51 | self.curr_ae_output = CNNAutoEncoder.forward(self, self.curr_ae_input) 52 | obs = input_dict["obs"] 53 | self._curr_value = torch.zeros((obs["gps"].shape[0],)).to(obs["gps"].device) 54 | out = torch.zeros((obs["gps"].shape[0], 5)).to(obs["gps"].device) 55 | # Return [batch, action_space] 56 | return out, state 57 | 58 | def value_function(self): 59 | return self._curr_value 60 | 61 | def to_img(self, input_dict): 62 | """Build obs into an image tensor for feeding to nn""" 63 | semantic = input_dict["obs"]["semantic"] 64 | depth = input_dict["obs"]["depth"] 65 | # [batch, channel, cols, rows] 66 | batch, channels, cols, rows = ( # type: ignore 67 | semantic.shape[0], 68 | semantic.shape[1] + 1, 69 | *semantic.shape[2:], # type : ignore 70 | ) 71 | 72 | semantic_tgt_channel = semantic.shape[1] 73 | depth_channel = semantic_tgt_channel # + 1 74 | 75 | x = torch.zeros( 76 | (batch, channels, cols, rows), dtype=torch.float32, device=semantic.device 77 | ) 78 | x[:, 0:semantic_tgt_channel] = semantic 79 | # x[:, semantic_channels:semantic_tgt_channel] = sem_tgt 80 | x[:, depth_channel] = torch.squeeze(depth) 81 | return x 82 | 83 | def custom_loss( 84 | self, policy_loss: List[torch.Tensor], loss_inputs 85 | ) -> List[torch.Tensor]: 86 | if not hasattr(self, "sem_tgt"): 87 | self.sem_tgt = torch.ones( 88 | self.curr_ae_input.shape[-2:], device=self.curr_ae_input.device 89 | ) 90 | 91 | self.sem_loss = self.sem_loss_fn( 92 | self.curr_ae_output[:, :-1, :, :], 93 | self.curr_ae_input[:, :-1, :, :], 94 | self.sem_tgt, 95 | ) 96 | 97 | self.depth_loss = self.depth_loss_fn( 98 | self.curr_ae_output[:, -1, :, :], 99 | self.curr_ae_input[:, -1, :, :], 100 | ) 101 | self.combined_loss = self.sem_loss + self.depth_loss 102 | 103 | return [self.combined_loss] 104 | 105 | def metrics(self): 106 | return { 107 | "depth_loss": self.depth_loss.detach().item(), 108 | "semantic_loss": self.sem_loss.detach().item(), 109 | "combined_loss": self.combined_loss.detach().item(), 110 | } 111 | -------------------------------------------------------------------------------- /src/models/ray_vae_d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | from torch import nn 4 | from typing import Union, Dict, List, Any 5 | import ray 6 | from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 7 | from ray.rllib.models.modelv2 import ModelV2, restore_original_dimensions 8 | from ray.rllib.utils.typing import ModelConfigDict, TensorType 9 | from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC 10 | 11 | 12 | from models.depth_vae import DepthVAE 13 | 14 | # from models.rgbd_resnet18_vae import VAE 15 | 16 | 17 | DEFAULTS = {"z_dim": 64, "depth_weight": 1, "rgb_weight": 1, "elbo_beta": 1} 18 | 19 | 20 | class DepthRayVAE(TorchModelV2, DepthVAE): 21 | def __init__( 22 | self, 23 | obs_space: gym.spaces.Space, 24 | action_space: gym.spaces.Space, 25 | num_outputs: int, 26 | model_config: ModelConfigDict, 27 | name: str, 28 | **custom_model_kwargs, 29 | ): 30 | super().__init__(obs_space, action_space, num_outputs, model_config, name) 31 | self.cfg = dict(DEFAULTS, **custom_model_kwargs) 32 | # VAE.__init__(self, z_dim=self.cfg["z_dim"], nc=4) 33 | DepthVAE.__init__(self, z_dim=self.cfg["z_dim"]) 34 | self.rgb_loss_fn = nn.MSELoss(reduction="mean") 35 | self.depth_loss_fn = nn.MSELoss(reduction="mean") 36 | self.act_space = gym.spaces.utils.flatdim(action_space) 37 | self.visdom_imgs: Dict[str, Any] = {} 38 | 39 | def forward(self, input_dict, state, seq_lens): 40 | """Compute autoencoded image. Note the returned "logits" 41 | are random, as we want random actions""" 42 | self.curr_ae_input = self.to_img(input_dict) 43 | # if torch.any(self.curr_ae_input > 0): 44 | # ray.util.pdb.set_trace() 45 | self.curr_ae_output, self.curr_mu, self.curr_logvar = DepthVAE.forward( 46 | self, self.curr_ae_input 47 | ) 48 | batch = input_dict["obs_flat"].shape[0] 49 | device = input_dict["obs_flat"].device 50 | self.device = device 51 | self._curr_value = torch.zeros((batch,), device=device) 52 | out = torch.zeros((batch, self.act_space), device=device) 53 | 54 | self.visdom_imgs.clear() 55 | di = self.curr_ae_input[:64].cpu().detach() 56 | do = self.curr_ae_output[:64].cpu().detach() 57 | self.visdom_imgs[f"depth_in-{self.cfg}"] = torch.cat( 58 | (di, di, di), dim=1 59 | ).numpy() 60 | self.visdom_imgs[f"depth_out-{self.cfg}"] = torch.cat( 61 | (do, do, do), dim=1 62 | ).numpy() 63 | # Return [batch, action_space] 64 | return out, state 65 | 66 | def value_function(self): 67 | return self._curr_value 68 | 69 | def to_img(self, input_dict): 70 | """Build obs into an image tensor for feeding to nn""" 71 | depth = input_dict["obs"]["depth"] 72 | # [batch, channel, cols, rows] 73 | # To B, dim, h, w 74 | depth = depth.permute(0, 3, 1, 2) 75 | """ 76 | x = torch.zeros( 77 | (batch, channels, cols, rows), dtype=torch.float32, device=rgb.device 78 | ) 79 | x[:, 0:semantic_tgt_channel] = semantic 80 | # x[:, semantic_channels:semantic_tgt_channel] = sem_tgt 81 | x[:, depth_channel] = torch.squeeze(depth) 82 | """ 83 | return depth 84 | 85 | def custom_loss( 86 | self, policy_loss: List[torch.Tensor], loss_inputs 87 | ) -> List[torch.Tensor]: 88 | 89 | self.sem_loss = torch.tensor([0], device=self.curr_ae_output.device) 90 | 91 | self.depth_loss = self.cfg["depth_weight"] * self.depth_loss_fn( 92 | self.curr_ae_output[:, -1, :, :], 93 | self.curr_ae_input[:, -1, :, :], 94 | ) 95 | self.recon_loss = self.sem_loss + self.depth_loss 96 | self.kld_loss = self.cfg["elbo_beta"] * DepthVAE.kld_loss( 97 | self, self.curr_mu, self.curr_logvar 98 | ) 99 | self.combined_loss = self.recon_loss + self.kld_loss 100 | 101 | return [self.combined_loss] 102 | 103 | def metrics(self): 104 | return { 105 | "depth_loss": self.depth_loss.detach().item(), 106 | "semantic_loss": self.sem_loss.detach().item(), 107 | "combined_loss": self.combined_loss.detach().item(), 108 | } 109 | -------------------------------------------------------------------------------- /src/models/ray_vae_rgbd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | from torch import nn 4 | from typing import Union, Dict, List, Any 5 | import ray 6 | from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 7 | from ray.rllib.models.modelv2 import ModelV2, restore_original_dimensions 8 | from ray.rllib.utils.typing import ModelConfigDict, TensorType 9 | from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC 10 | 11 | 12 | from models.rgbd_vae import RGBDVAE 13 | 14 | # from models.rgbd_resnet18_vae import VAE 15 | 16 | 17 | DEFAULTS = {"z_dim": 64, "depth_weight": 1, "rgb_weight": 1, "elbo_beta": 1} 18 | 19 | 20 | class RGBDRayVAE(TorchModelV2, RGBDVAE): 21 | def __init__( 22 | self, 23 | obs_space: gym.spaces.Space, 24 | action_space: gym.spaces.Space, 25 | num_outputs: int, 26 | model_config: ModelConfigDict, 27 | name: str, 28 | **custom_model_kwargs, 29 | ): 30 | super().__init__(obs_space, action_space, num_outputs, model_config, name) 31 | self.cfg = dict(DEFAULTS, **custom_model_kwargs) 32 | # VAE.__init__(self, z_dim=self.cfg["z_dim"], nc=4) 33 | RGBDVAE.__init__(self, z_dim=self.cfg["z_dim"]) 34 | self.rgb_loss_fn = nn.MSELoss(reduction="mean") 35 | self.depth_loss_fn = nn.MSELoss(reduction="mean") 36 | self.act_space = gym.spaces.utils.flatdim(action_space) 37 | self.visdom_imgs: Dict[str, Any] = {} 38 | 39 | def forward(self, input_dict, state, seq_lens): 40 | """Compute autoencoded image. Note the returned "logits" 41 | are random, as we want random actions""" 42 | self.curr_ae_input = self.to_img(input_dict) 43 | # if torch.any(self.curr_ae_input > 0): 44 | # ray.util.pdb.set_trace() 45 | self.curr_ae_output, self.curr_mu, self.curr_logvar = RGBDVAE.forward( 46 | self, self.curr_ae_input 47 | ) 48 | batch = input_dict["obs_flat"].shape[0] 49 | device = input_dict["obs_flat"].device 50 | self._curr_value = torch.zeros((batch,), device=device) 51 | out = torch.zeros((batch, self.act_space), device=device) 52 | 53 | self.visdom_imgs.clear() 54 | self.visdom_imgs[f"rgb_in-{self.cfg}"] = ( 55 | self.curr_ae_input[:64, 0:3].cpu().detach().numpy() 56 | ) 57 | self.visdom_imgs[f"rgb_out-{self.cfg}"] = ( 58 | self.curr_ae_output[:64, 0:3].cpu().detach().numpy() 59 | ) 60 | di = self.curr_ae_input[:64, 3].cpu().detach() 61 | do = self.curr_ae_output[:64, 3].cpu().detach() 62 | self.visdom_imgs[f"depth_in-{self.cfg}"] = torch.stack( 63 | (di, di, di), dim=1 64 | ).numpy() 65 | self.visdom_imgs[f"depth_out-{self.cfg}"] = torch.stack( 66 | (do, do, do), dim=1 67 | ).numpy() 68 | # Return [batch, action_space] 69 | return out, state 70 | 71 | def value_function(self): 72 | return self._curr_value 73 | 74 | def to_img(self, input_dict): 75 | """Build obs into an image tensor for feeding to nn""" 76 | rgb = input_dict["obs"]["rgb"] 77 | depth = input_dict["obs"]["depth"] 78 | # input shape B, h, w, dim 79 | rgb = rgb / 255.0 80 | assert rgb.min() >= 0.0 and rgb.max() <= 1.0 81 | x = torch.cat((rgb, depth), dim=-1) 82 | # To B, dim, h, w 83 | x = x.permute(0, 3, 1, 2) 84 | return x 85 | 86 | def custom_loss( 87 | self, policy_loss: List[torch.Tensor], loss_inputs 88 | ) -> List[torch.Tensor]: 89 | 90 | self.sem_loss = self.cfg["rgb_weight"] * self.rgb_loss_fn( 91 | self.curr_ae_output[:, :-1, :, :], 92 | self.curr_ae_input[:, :-1, :, :], 93 | # self.sem_tgt, 94 | ) 95 | 96 | self.depth_loss = self.cfg["depth_weight"] * self.depth_loss_fn( 97 | self.curr_ae_output[:, -1, :, :], 98 | self.curr_ae_input[:, -1, :, :], 99 | ) 100 | self.recon_loss = self.sem_loss + self.depth_loss 101 | self.kld_loss = self.cfg["elbo_beta"] * RGBDVAE.kld_loss( 102 | self, self.curr_mu, self.curr_logvar 103 | ) 104 | self.combined_loss = self.recon_loss + self.kld_loss 105 | 106 | return [self.combined_loss] 107 | 108 | def metrics(self): 109 | return { 110 | "depth_loss": self.depth_loss.detach().item(), 111 | "semantic_loss": self.sem_loss.detach().item(), 112 | "combined_loss": self.combined_loss.detach().item(), 113 | } 114 | -------------------------------------------------------------------------------- /src/models/rgbd_vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class RGBDVAE(nn.Module): 6 | # On CPU this takes 2.5ms to encode 7 | # on GPU, 886 microsecs 8 | def __init__(self, h_dim=512, z_dim=64): 9 | assert h_dim in [512] 10 | super().__init__() 11 | self.z_dim = z_dim 12 | self.encoder = nn.Sequential( 13 | nn.Conv2d(4, 32, 5, stride=3), # b, 64, 10 14 | nn.BatchNorm2d(32), 15 | nn.ReLU(), 16 | nn.Conv2d(32, 64, 4, stride=2), # b, 96, 4, 4 17 | nn.BatchNorm2d(64), 18 | nn.ReLU(), 19 | nn.Conv2d(64, 128, 3, stride=1), # b, 128, 2, 2 20 | nn.BatchNorm2d(128), 21 | nn.ReLU(), 22 | nn.Flatten(), 23 | ) 24 | 25 | self.fc1 = nn.Linear(h_dim, z_dim) 26 | self.fc2 = nn.Linear(h_dim, z_dim) 27 | 28 | self.fc3 = nn.Linear(z_dim, h_dim) 29 | 30 | self.decoder = nn.Sequential( 31 | nn.Unflatten(1, (h_dim // 4, 2, 2)), # TODO: This should be variable 32 | nn.ConvTranspose2d(128, 64, 3, stride=1), # b, 128, 2, 2 33 | nn.BatchNorm2d(64), 34 | nn.ReLU(), 35 | nn.ConvTranspose2d(64, 32, 4, stride=2), # b, 96, 4, 4 36 | nn.BatchNorm2d(32), 37 | nn.ReLU(), 38 | nn.ConvTranspose2d(32, 4, 5, stride=3), # b, 64, 10 39 | nn.Sigmoid(), 40 | ) 41 | 42 | def reparameterize(self, mu, logvar): 43 | std = logvar.mul(0.5).exp_().to(logvar.device) 44 | # return torch.normal(mu, std) 45 | esp = torch.randn(*mu.size()).to(mu.device) 46 | z = mu + std * esp 47 | return z 48 | 49 | def bottleneck(self, h): 50 | mu, logvar = self.fc1(h), self.fc2(h) 51 | z = self.reparameterize(mu, logvar) 52 | return z, mu, logvar 53 | 54 | def encode(self, x): 55 | h = self.encoder(x) 56 | z, mu, logvar = self.bottleneck(h) 57 | return z, mu, logvar 58 | 59 | def decode(self, z): 60 | z = self.fc3(z) 61 | z = self.decoder(z) 62 | return z 63 | 64 | def forward(self, x): 65 | z, mu, logvar = self.encode(x) 66 | z = self.decode(z) 67 | return z, mu, logvar 68 | 69 | def kld_loss(self, mu, logvar): 70 | return -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp()) 71 | 72 | def loss_fn(self, recon_x, x, mu, logvar): 73 | BCE = nn.functional.binary_cross_entropy(recon_x, x, size_average=False) 74 | # BCE = F.mse_loss(recon_x, x, size_average=False) 75 | 76 | # see Appendix B from VAE paper: 77 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 78 | # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 79 | KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp()) 80 | 81 | return BCE + KLD, BCE, KLD 82 | -------------------------------------------------------------------------------- /src/models/sparse_edge_selectors/distance.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Distance(torch.nn.Module): 5 | """Base class for edges based on the similarity between 6 | latent representations""" 7 | 8 | def __init__(self, max_distance): 9 | super().__init__() 10 | self.max_distance = max_distance 11 | 12 | def forward(self, nodes, edge_list, weights, B, T, t): 13 | """Connect current obs to past obs based on distance of the node features""" 14 | # Compare B * t to B * (t + T) nodes 15 | # results in B * (t * (t + T)) comparisons 16 | # with shape [B, t, t + T] 17 | # after comparisons, edges will be 18 | # [B, t, <= t + T] 19 | cur_node_idxs = torch.arange(T, T + t) 20 | # TODO: Cur_nodes assumes batches are aligned along temporal dim 21 | cur_nodes = nodes[:, cur_node_idxs] 22 | # [B, T] 23 | dist = self.dist_fn(cur_nodes, nodes) 24 | b_i_j_idxs = torch.nonzero(dist < self.max_distance).t() 25 | # a is the cur_nodes and b is all nodes 26 | a, b = b_i_j_idxs[1:] 27 | # Add the offset as cur_node_idxs do not start at 0 28 | a = a + T 29 | 30 | assert a.shape == b.shape 31 | pairs = torch.stack((a, b)).reshape(2, B, -1).permute(1, 0, 2) 32 | # Filter out edges pointing to the future, e.g. a < b 33 | # as well as self edges a == b 34 | edge_mask = torch.stack( 35 | (pairs[:, 0, :] > pairs[:, 1, :], pairs[:, 0, :] > pairs[:, 1, :]), # a,b 36 | dim=1, 37 | ) 38 | pairs = pairs[edge_mask].reshape(B, 2, -1) 39 | 40 | edge_list = torch.cat((edge_list, pairs), dim=-1) 41 | return edge_list, weights 42 | 43 | 44 | class EuclideanEdge(Distance): 45 | """Mean per-dimension euclidean distance between obs vectors""" 46 | 47 | def __init__(self, max_distance): 48 | super().__init__(max_distance) 49 | 50 | def dist_fn(self, a, b): 51 | return torch.cdist(a, b) 52 | 53 | 54 | class CosineEdge(Distance): 55 | """Mean per-dimension cosine distance between obs vectors""" 56 | 57 | def __init__(self, max_distance): 58 | super().__init__(max_distance) 59 | self.cs = torch.nn.modules.distance.CosineSimilarity(dim=2) 60 | 61 | def dist_fn(self, a, b): 62 | a = torch.cat([a.unsqueeze(1)] * b.shape[1], dim=1) 63 | return self.cs(a, b) 64 | 65 | 66 | class SpatialEdge(Distance): 67 | """Euclidean distance representing the physical distance between two observations""" 68 | 69 | def __init__(self, max_distance, pose_slice): 70 | super().__init__(max_distance) 71 | self.pose_slice = pose_slice 72 | 73 | def dist_fn(self, a, b): 74 | a = torch.cat([a.unsqueeze(1)] * b.shape[1], dim=1) 75 | ra = a[:, :, self.pose_slice] 76 | rb = b[:, :, self.pose_slice] 77 | return torch.cdist(ra, rb).mean(dim=1) 78 | -------------------------------------------------------------------------------- /src/models/sparse_edge_selectors/temporal.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class TemporalEdge(torch.nn.Module): 5 | """Add temporal bidirectional back edge, but only if we have >1 nodes 6 | E.g., node_{t} <-> node_{t-1}""" 7 | 8 | def __init__(self, num_hops=1, bidirectional=False): 9 | super().__init__() 10 | self.num_hops = num_hops 11 | self.bidirectional = bidirectional 12 | assert num_hops == 1, "Not yet implemented" 13 | 14 | def forward(self, nodes, edge_list, weights, B, T, t): 15 | # Assumes equal sized full episodes 16 | # Rather than fill all previous edges (T) 17 | # only fill out the ones for our newly added nodes (T to T + t) 18 | a = torch.arange(T, T + t, device=nodes.device) 19 | b = torch.arange(T + 1, T + t + 1, device=nodes.device) 20 | 21 | new_edges = torch.stack((b, a)).repeat(B, 1, 1) 22 | if self.bidirectional: 23 | out_edge = torch.stack((a, b)).repeat(B, 1, 1) 24 | new_edges = torch.cat((new_edges, out_edge), dim=-1) 25 | 26 | # TODO: We have edge 0,1 at t0 with single node 27 | all_edges = torch.cat((edge_list, new_edges), dim=-1) 28 | return all_edges, weights 29 | -------------------------------------------------------------------------------- /src/models/sparse_gcm.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple, Any 2 | import torch 3 | import torch_geometric 4 | from torch_geometric_temporal.signal.dynamic_graph_static_signal import ( 5 | DynamicGraphStaticSignal, 6 | ) 7 | 8 | 9 | class SparseGCM(torch.nn.Module): 10 | """Graph Associative Memory""" 11 | 12 | def __init__( 13 | self, 14 | gnn: torch.nn.Module, 15 | preprocessor: torch.nn.Sequential = None, 16 | edge_selectors: torch_geometric.nn.Sequential = None, 17 | graph_size: int = 128, 18 | ): 19 | super().__init__() 20 | 21 | self.gnn = gnn 22 | self.graph_size = graph_size 23 | self.preprocessor = preprocessor 24 | self.edge_selectors = edge_selectors 25 | 26 | def build_batch( 27 | self, 28 | nodes: torch.Tensor, 29 | edge_list: torch.Tensor, 30 | weights: torch.Tensor, 31 | B: int, 32 | T: int, 33 | t: int, 34 | max_hops: int, 35 | ) -> torch_geometric.data.Batch: 36 | 37 | # We add all nodes to all timesteps 38 | # but filter using the edges 39 | data_list = [] 40 | 41 | for b in range(B): 42 | for tau in range(t): 43 | new_node_idx = T + tau 44 | graph_x = nodes[b].narrow(-2, 0, new_node_idx + 1) 45 | 46 | # TODO: Use k_hop_subgraph to improve performance 47 | 48 | # Prune edges if either edge a or b is outside 49 | # of the current nodes 50 | edge_mask = torch.sum(edge_list[b] <= new_node_idx, dim=0) == 2 51 | edge_mask = torch.stack((edge_mask, edge_mask)) 52 | graph_edge = edge_list[b].masked_select(edge_mask).reshape(2, -1) 53 | if weights is not None: 54 | graph_weight = weights[b, torch.arange(graph_edge.shape[-1])] 55 | d = torch_geometric.data.Data( 56 | x=graph_x, 57 | edge_index=graph_edge, 58 | edge_attr=graph_weight, 59 | B=b, 60 | new_idx=new_node_idx, 61 | t=tau, 62 | T=T, 63 | ) 64 | else: 65 | d = torch_geometric.data.Data( 66 | x=graph_x, 67 | edge_index=graph_edge, 68 | B=b, 69 | new_idx=new_node_idx, 70 | t=tau, 71 | T=T, 72 | ) 73 | 74 | data_list.append(d) 75 | batch = torch_geometric.data.Batch.from_data_list(data_list) 76 | return batch 77 | 78 | def forward( 79 | self, 80 | x: torch.Tensor, 81 | nodes: torch.Tensor, 82 | edge_list: torch.Tensor, 83 | weights: torch.Tensor, 84 | max_hops: int = -1, 85 | ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: 86 | """Add memores x to the graph, and return queries at x. 87 | B = batch size 88 | T = temporal input size 89 | t = temporal output size 90 | k = number of input edges 91 | j = number of output edges 92 | Inputs: 93 | x: [B,t,feat] 94 | hidden: ( 95 | nodes: [B,T,feats] 96 | edge_list: [B,2,k] 97 | weights: [B,k,1] 98 | ) 99 | Outputs: 100 | m(x): [B,t,feat] 101 | hidden: ( 102 | nodes: [B,T+t,feats] 103 | edge_list: [B,2,j] 104 | weights: [B,j,1] 105 | ) 106 | """ 107 | B = nodes.shape[0] 108 | T = nodes.shape[1] 109 | t = x.shape[1] 110 | 111 | nodes = torch.cat((nodes, x), dim=-2) 112 | 113 | if self.edge_selectors is not None: 114 | edge_list, weights = self.edge_selectors(nodes, edge_list, weights, B, T, t) 115 | # Batching will collapse edges into a single [2,j], nodes into 116 | # [B * (T + t), feats], etc 117 | # So make sure we make all the changes we need to 118 | # nodes/edges/weights before batching and gnn forward 119 | batch = self.build_batch( 120 | nodes, edge_list, weights, B, T, t, max_hops 121 | ).coalesce() 122 | # Preprocessor can be used to change obs_size to hidden_size 123 | # if needed 124 | if self.preprocessor: 125 | batch.x = self.preprocessor(batch.x) 126 | 127 | if weights is None: 128 | out = self.gnn(batch.x, batch.edge_index) 129 | else: 130 | out = self.gnn(batch.x, batch.edge_index, batch.edge_attr) 131 | 132 | # Shape [B*t,feat] -> [B,t,feat] 133 | # mx = batch.x[batch.ptr[1:] - 1].reshape(B, t, batch.x.shape[-1]) 134 | mx = out[batch.ptr[1:] - 1].reshape(B, t, out.shape[-1]) 135 | # Extract nodes 136 | # nodes = torch_geometric.utils.to_dense_batch(batch.x, batch.batch)[0] 137 | 138 | return mx, nodes, edge_list, weights 139 | -------------------------------------------------------------------------------- /src/models/test_ray_graph.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | import ray_graph 4 | 5 | 6 | class TestAdj(unittest.TestCase): 7 | g = ray_graph.RayObsGraph 8 | 9 | def setUp(self): 10 | self.adj = torch.zeros(2, 3, 3, dtype=torch.float32) 11 | self.nodes = torch.zeros(2, 3, 4, dtype=torch.float32) 12 | self.num_nodes = torch.tensor([[1, 2]]).long().T 13 | 14 | def test_add_self_edge(self): 15 | self.g.add_self_edge(None, self.adj, self.num_nodes), 16 | sol = torch.tensor( 17 | [[[0, 0, 0], [0, 1, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 1]]] 18 | ).float() 19 | 20 | if torch.any(self.adj != sol): 21 | self.fail(f"\nactual:\n {self.adj}\nexpected:\n {sol}") 22 | 23 | def test_add_backedge(self): 24 | self.g.add_backedge(None, self.adj, self.num_nodes), 25 | sol = torch.tensor( 26 | [[[0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 1], [0, 1, 0]]], 27 | ) 28 | 29 | if torch.any(self.adj != sol): 30 | self.fail(f"\nactual:\n {self.adj}\nexpected:\n {sol}") 31 | 32 | def test_densify_graph(self): 33 | self.g.densify_graph(None, self.adj, self.num_nodes), 34 | sol = torch.tensor( 35 | [[[1, 1, 0], [1, 1, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], 36 | ).float() 37 | 38 | if torch.any(self.adj != sol): 39 | self.fail(f"\nactual:\n {self.adj}\nexpected:\n {sol}") 40 | 41 | """ 42 | def test_positional_encoding(self): 43 | self.g.add_time_positional_encoding(None, self.nodes, self.num_nodes) 44 | 45 | embed = torch.range(0, self.nodes.shape[-1] - 1) 46 | sol = torch.sin(self.nodes / (10000 ** (2 * embed / embed.shape[0]))) 47 | 48 | if torch.any(self.nodes != sol): 49 | self.fail(f"\nactual:\n {self.nodes}\nexpected:\n {sol}") 50 | """ 51 | 52 | def test_learn_edges(self): 53 | class Fake: 54 | i = 0 55 | 56 | def edge_network(self, a): 57 | rv = torch.tensor( 58 | (self.i % 2 * 1e9, int(self.i % 2 == 0) * 1e9), dtype=torch.float32 59 | ) 60 | self.i += 1 61 | return rv 62 | 63 | sol = torch.tensor( 64 | [ 65 | [ 66 | [0, 1, 0], # Only top 2x2 chunk is used 67 | [0, 1, 0], 68 | [0, 0, 0], 69 | ], 70 | [[0, 1, 0], [1, 0, 1], [0, 1, 0]], 71 | ], 72 | dtype=torch.float, 73 | ) 74 | self.g.add_learned_edges(Fake(), self.nodes, self.adj, self.num_nodes) 75 | 76 | if torch.any(self.adj != sol): 77 | self.fail(f"\nactual:\n {self.adj}\nexpected:\n {sol}") 78 | 79 | def test_index_select(self): 80 | nodes0 = torch.arange(24).reshape(2, 3, 4) 81 | nodes1 = torch.arange(24).reshape(2, 3, 4) 82 | 83 | flat = torch.ones(2, 4, dtype=torch.long) 84 | outs = [] 85 | 86 | for batch in range(self.nodes.shape[0]): 87 | outs.append(nodes0[batch, self.num_nodes[batch]]) 88 | nodes0[batch, self.num_nodes[batch]] = flat[batch] 89 | 90 | # It's critical both these vectors are 1D 91 | idx_0 = torch.arange(self.num_nodes.shape[0]) 92 | idx_1 = self.num_nodes.squeeze() 93 | nodes1[idx_0, idx_1] = flat[idx_0] 94 | 95 | if torch.any(nodes0 != nodes1): 96 | self.fail(f"\nactual:\n {nodes1}\nexpected:\n {nodes0}") 97 | 98 | 99 | if __name__ == "__main__": 100 | unittest.main() 101 | -------------------------------------------------------------------------------- /src/models/test_sparse_gam.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | from sparse_gcm import SparseGCM 4 | import torch_geometric 5 | from sparse_edge_selectors.temporal import TemporalEdge 6 | from sparse_edge_selectors.distance import EuclideanEdge 7 | 8 | 9 | class Ident(torch.nn.Module): 10 | def forward(self, x, edge): 11 | return x 12 | 13 | 14 | class Sum(torch_geometric.nn.MessagePassing): 15 | def forward(self, x, edge): 16 | return self.propagate(edge, x=x) 17 | 18 | def message(self, x_i, x_j): 19 | return x_i + x_j.sum(dim=0) 20 | 21 | 22 | class TestSparseGCM(unittest.TestCase): 23 | def setUp(self): 24 | B = 3 25 | T = 4 26 | feat = 5 27 | t = 2 28 | 29 | self.nodes = torch.arange(1, B * T * feat + 1).reshape(B, T, feat) 30 | # Self edges 31 | # a = torch.arange(T) 32 | # edges = torch.meshgrid(a, a) 33 | a = torch.arange(T, dtype=torch.long) 34 | edges = torch.stack((a, a)).unsqueeze(0) 35 | # B, 2, T 36 | self.edges = edges.repeat(B, 1, 1) 37 | self.xs = torch.zeros(B, t, feat) 38 | 39 | self.gnn = torch_geometric.nn.Sequential( 40 | "x, edge_index", [(Ident(), "x, edge_index -> x")] 41 | ) 42 | 43 | self.gcm = SparseGCM(self.gnn) 44 | 45 | def test_simple(self): 46 | out, nodes, edge_list, weights = self.gcm(self.xs, self.nodes, self.edges, None) 47 | desired = torch.zeros(3, 2, 5) 48 | if torch.any(out != desired): 49 | self.fail(f"{out} != {desired}") 50 | 51 | def test_first_run(self): 52 | self.nodes = torch.tensor([]).reshape(3, 0, 5) 53 | self.edges = torch.tensor([], dtype=torch.long).reshape(3, 2, 0) 54 | out, nodes, edge_list, weights = self.gcm(self.xs, self.nodes, self.edges, None) 55 | 56 | desired_shape = (3, 2, 5) 57 | if nodes.shape != desired_shape: 58 | self.fail(f"{nodes.shape} != {desired_shape}") 59 | 60 | 61 | class TestBatchBuilder(unittest.TestCase): 62 | def setUp(self): 63 | B = 3 64 | T = 4 65 | feat = 5 66 | t = 2 67 | 68 | self.nodes = torch.arange(1, B * T * feat + 1).reshape(B, T, feat) 69 | # Self edges 70 | # a = torch.arange(T) 71 | # edges = torch.meshgrid(a, a) 72 | a = torch.arange(T, dtype=torch.long) 73 | edges = torch.stack((a, a)).unsqueeze(0) 74 | # B, 2, T 75 | self.edges = edges.repeat(B, 1, 1) 76 | self.xs = torch.zeros(B, t, feat) 77 | 78 | self.gnn = torch_geometric.nn.Sequential( 79 | "x, edge_index", [(Ident(), "x, edge_index -> x")] 80 | ) 81 | 82 | self.gcm = SparseGCM(self.gnn) 83 | 84 | def test_impossible_edges(self): 85 | self.edges[0, :, 0] = torch.tensor([100, 99], dtype=torch.long) 86 | # self.gnn.build_batch(self.nodes, self.edge_list, None, 87 | out, nodes, edge_list, weights = self.gcm( 88 | self.xs[:, 0, :].unsqueeze(1), self.nodes, self.edges, None 89 | ) 90 | # import pdb; pdb.set_trace() 91 | 92 | def test_future_edges(self): 93 | return 94 | self.edges[0, :, 0] = torch.tensor([5, 4], dtype=torch.long) 95 | out, nodes, edge_list, weights = self.gcm( 96 | self.xs[:, 0, :].unsqueeze(1), self.nodes, self.edges, None 97 | ) 98 | 99 | 100 | class TestTemporalEdge(unittest.TestCase): 101 | def setUp(self): 102 | B = 3 103 | T = 4 104 | feat = 5 105 | t = 2 106 | 107 | self.nodes = torch.arange(1, B * T * feat + 1).reshape(B, T, feat) 108 | # Self edges 109 | self.edges = torch.tensor([], dtype=torch.long).reshape(3, 2, 0) 110 | # B, 2, T 111 | self.xs = torch.zeros(B, t, feat) 112 | 113 | self.gnn = torch_geometric.nn.Sequential( 114 | "x, edge_index", [(Sum(), "x, edge_index -> x")] 115 | ) 116 | 117 | self.edge_selector = torch_geometric.nn.Sequential( 118 | "nodes, edge_list, weights, B, T, t", 119 | [ 120 | ( 121 | TemporalEdge(1), 122 | "nodes, edge_list, weights, B, T, t -> edge_list, weights", 123 | ) 124 | ], 125 | ) 126 | 127 | self.gcm = SparseGCM(self.gnn, edge_selectors=self.edge_selector) 128 | 129 | def test_simple(self): 130 | out, nodes, edge_list, weights = self.gcm(self.xs, self.nodes, self.edges, None) 131 | 132 | desired = torch.tensor( 133 | [ 134 | [5, 6], 135 | [4, 5], 136 | ], 137 | dtype=torch.long, 138 | ).repeat(3, 1, 1) 139 | if torch.any(edge_list != desired): 140 | self.fail(f"{edge_list} != {desired}") 141 | 142 | 143 | class TestDistanceEdge(unittest.TestCase): 144 | def setUp(self): 145 | B = 3 146 | T = 4 147 | feat = 5 148 | t = 2 149 | 150 | self.nodes = torch.arange(1, B * T * feat + 1).reshape(B, T, feat) 151 | self.edges = torch.tensor([], dtype=torch.long).reshape(3, 2, 0) 152 | # B, 2, T 153 | self.xs = torch.zeros(B, t, feat) 154 | 155 | self.gnn = torch_geometric.nn.Sequential( 156 | "x, edge_index", [(Sum(), "x, edge_index -> x")] 157 | ) 158 | 159 | self.edge_selector = torch_geometric.nn.Sequential( 160 | "nodes, edge_list, weights, B, T, t", 161 | [ 162 | ( 163 | EuclideanEdge(1), 164 | "nodes, edge_list, weights, B, T, t -> edge_list, weights", 165 | ) 166 | ], 167 | ) 168 | 169 | self.gcm = SparseGCM(self.gnn, edge_selectors=self.edge_selector) 170 | 171 | def test_simple(self): 172 | self.nodes[:, 0] = 0 173 | out, nodes, edge_list, weights = self.gcm(self.xs, self.nodes, self.edges, None) 174 | 175 | desired = torch.tensor( 176 | [ 177 | [4, 5, 5], 178 | [0, 0, 4], 179 | ], 180 | dtype=torch.long, 181 | ).repeat(3, 1, 1) 182 | if torch.any(edge_list != desired): 183 | self.fail(f"{edge_list} != {desired}") 184 | 185 | def test_staggered(self): 186 | self.nodes[0, 0] = 0 187 | self.nodes[1, 1] = 0 188 | self.nodes[2, 2] = 0 189 | out, nodes, edge_list, weights = self.gcm(self.xs, self.nodes, self.edges, None) 190 | 191 | desired = torch.tensor( 192 | [ 193 | [ 194 | [4, 5, 5], 195 | [0, 0, 4], 196 | ], 197 | [ 198 | [4, 5, 5], 199 | [1, 1, 4], 200 | ], 201 | [[4, 5, 5], [2, 2, 4]], 202 | ], 203 | dtype=torch.long, 204 | ) 205 | if torch.any(edge_list != desired): 206 | self.fail(f"{edge_list} != {desired}") 207 | 208 | 209 | if __name__ == "__main__": 210 | unittest.main() 211 | -------------------------------------------------------------------------------- /src/models/vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class VAE(nn.Module): 6 | # On CPU this takes 2.5ms to encode 7 | # on GPU, 886 microsecs 8 | def __init__(self, h_dim=2048, z_dim=128): 9 | assert h_dim in [2048] 10 | super().__init__() 11 | self.z_dim = z_dim 12 | self.encoder = nn.Sequential( 13 | nn.Conv2d(43 + 1, 128, 5, stride=3), # b, 64, 10 14 | nn.ReLU(), 15 | nn.Conv2d(128, 256, 4, stride=2), # b, 96, 4, 4 16 | nn.ReLU(), 17 | nn.Conv2d(256, 512, 3, stride=1), # b, 128, 2, 2 18 | nn.ReLU(), 19 | nn.Flatten(), 20 | ) 21 | 22 | self.fc1 = nn.Linear(h_dim, z_dim) 23 | self.fc2 = nn.Linear(h_dim, z_dim) 24 | 25 | self.fc3 = nn.Linear(z_dim, h_dim) 26 | 27 | self.decoder = nn.Sequential( 28 | nn.Unflatten(1, (h_dim // 4, 2, 2)), # TODO: This should be variable 29 | nn.ConvTranspose2d(512, 256, 3, stride=1), # b, 128, 2, 2 30 | nn.ReLU(), 31 | nn.ConvTranspose2d(256, 128, 4, stride=2), # b, 96, 4, 4 32 | nn.ReLU(), 33 | nn.ConvTranspose2d(128, 43 + 1, 5, stride=3), # b, 64, 10 34 | nn.Sigmoid(), 35 | ) 36 | 37 | def reparameterize(self, mu, logvar): 38 | std = logvar.mul(0.5).exp_().to(logvar.device) 39 | # return torch.normal(mu, std) 40 | esp = torch.randn(*mu.size()).to(mu.device) 41 | z = mu + std * esp 42 | return z 43 | 44 | def bottleneck(self, h): 45 | mu, logvar = self.fc1(h), self.fc2(h) 46 | z = self.reparameterize(mu, logvar) 47 | return z, mu, logvar 48 | 49 | def encode(self, x): 50 | h = self.encoder(x) 51 | z, mu, logvar = self.bottleneck(h) 52 | return z, mu, logvar 53 | 54 | def decode(self, z): 55 | z = self.fc3(z) 56 | z = self.decoder(z) 57 | return z 58 | 59 | def forward(self, x): 60 | z, mu, logvar = self.encode(x) 61 | z = self.decode(z) 62 | return z, mu, logvar 63 | 64 | def kld_loss(self, mu, logvar): 65 | return -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp()) 66 | 67 | def loss_fn(self, recon_x, x, mu, logvar): 68 | BCE = nn.functional.binary_cross_entropy(recon_x, x, size_average=False) 69 | # BCE = F.mse_loss(recon_x, x, size_average=False) 70 | 71 | # see Appendix B from VAE paper: 72 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 73 | # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 74 | KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp()) 75 | 76 | return BCE + KLD, BCE, KLD 77 | -------------------------------------------------------------------------------- /src/models/visualize_grad.py: -------------------------------------------------------------------------------- 1 | from graphviz import Digraph 2 | import torch 3 | from torch.autograd import Variable, Function 4 | 5 | 6 | def iter_graph(root, callback): 7 | queue = [root] 8 | seen = set() 9 | while queue: 10 | fn = queue.pop() 11 | if fn in seen: 12 | continue 13 | seen.add(fn) 14 | for next_fn, _ in fn.next_functions: 15 | if next_fn is not None: 16 | queue.append(next_fn) 17 | callback(fn) 18 | 19 | 20 | def register_hooks(var): 21 | fn_dict = {} 22 | 23 | def hook_cb(fn): 24 | def register_grad(grad_input, grad_output): 25 | fn_dict[fn] = grad_input 26 | 27 | fn.register_hook(register_grad) 28 | 29 | iter_graph(var.grad_fn, hook_cb) 30 | 31 | def is_bad_grad(grad_output): 32 | grad_output = grad_output.data 33 | return grad_output.ne(grad_output).any() or grad_output.gt(1e6).any() 34 | 35 | def make_dot(): 36 | node_attr = dict( 37 | style="filled", 38 | shape="box", 39 | align="left", 40 | fontsize="12", 41 | ranksep="0.1", 42 | height="0.2", 43 | ) 44 | dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12")) 45 | 46 | def size_to_str(size): 47 | return "(" + (", ").join(map(str, size)) + ")" 48 | 49 | def build_graph(fn): 50 | if hasattr(fn, "variable"): # if GradAccumulator 51 | u = fn.variable 52 | node_name = "Variable\n " + size_to_str(u.size()) 53 | dot.node(str(id(u)), node_name, fillcolor="lightblue") 54 | else: 55 | assert fn in fn_dict, fn 56 | fillcolor = "white" 57 | if any(is_bad_grad(gi) for gi in fn_dict[fn]): 58 | fillcolor = "red" 59 | dot.node(str(id(fn)), str(type(fn).__name__), fillcolor=fillcolor) 60 | for next_fn, _ in fn.next_functions: 61 | if next_fn is not None: 62 | next_id = id(getattr(next_fn, "variable", next_fn)) 63 | dot.edge(str(next_id), str(id(fn))) 64 | 65 | iter_graph(var.grad_fn, build_graph) 66 | 67 | return dot 68 | 69 | return make_dot 70 | 71 | 72 | def save_grads(z): 73 | z = z.sum() 74 | get_dot = register_hooks(z) 75 | z.backward() 76 | dot = get_dot() 77 | dot.save("tmp.dot") 78 | -------------------------------------------------------------------------------- /src/plot/cartpole.py: -------------------------------------------------------------------------------- 1 | { 2 | "x": "agent_timesteps_total", 3 | "x_label": "Training Timestep", 4 | "y_label": "Mean Reward per Train Batch", 5 | "y": "episode_reward_mean", 6 | "ci": 90, 7 | "range": (20, 205), 8 | "title": None, 9 | "smooth": 10, 10 | "group_category": "$|z|$", 11 | "trial_category": "Memory Module", 12 | "num_samples": 500, 13 | "output": "/tmp/plots/cartpole.pdf", 14 | "legend_offset": 0.85, 15 | "limit_line": 195, 16 | "use_latex": True, 17 | 18 | "experiment_groups": [ 19 | { 20 | "group_title": "$8$", 21 | "replay": 0, 22 | "data_prefix": "/Users/smorad/data/corl_2021_exp/cartpole/h8/", 23 | "data": [ 24 | { 25 | "title": "MLP", 26 | "trial_paths": ["mlp/*/progress.csv"] 27 | }, 28 | { 29 | "title": "LSTM", 30 | "trial_paths": ["lstm/*/progress.csv"] 31 | }, 32 | { 33 | "title": "GTrXL", 34 | "trial_paths": ["gtrxl/*/progress.csv"] 35 | }, 36 | { 37 | "title": "DNC", 38 | "trial_paths": ["dnc/*/progress.csv"] 39 | }, 40 | { 41 | "title": "GCM", 42 | "trial_paths": ["gcm/*/progress.csv"], 43 | }, 44 | { 45 | "title": "GTrXL $(t-2)$", 46 | "trial_paths": ["gtrxl_2t/*/progress.csv"] 47 | } 48 | ] 49 | }, 50 | { 51 | "group_title": "$16$", 52 | "replay": 0, 53 | "data_prefix": "/Users/smorad/data/corl_2021_exp/cartpole/h16/", 54 | "data": [ 55 | { 56 | "title": "MLP", 57 | "trial_paths": ["mlp/*/progress.csv"] 58 | }, 59 | { 60 | "title": "LSTM", 61 | "trial_paths": ["lstm/*/progress.csv"] 62 | }, 63 | { 64 | "title": "GTrXL", 65 | "trial_paths": ["gtrxl/*/progress.csv"] 66 | }, 67 | { 68 | "title": "DNC", 69 | "trial_paths": ["dnc/*/progress.csv"] 70 | }, 71 | { 72 | "title": "GCM", 73 | "trial_paths": ["gcm/*/progress.csv"], 74 | }, 75 | { 76 | "title": "GTrXL $(t-2)$", 77 | "trial_paths": ["gtrxl_2t/*/progress.csv"] 78 | } 79 | ] 80 | }, 81 | { 82 | "group_title": "$32$", 83 | "replay": 0, 84 | "data_prefix": "/Users/smorad/data/corl_2021_exp/cartpole/h32/", 85 | "data": [ 86 | { 87 | "title": "MLP", 88 | "trial_paths": ["mlp/*/progress.csv"] 89 | }, 90 | { 91 | "title": "LSTM", 92 | "trial_paths": ["lstm/*/progress.csv"] 93 | }, 94 | { 95 | "title": "GTrXL", 96 | "trial_paths": ["gtrxl/*/progress.csv"] 97 | }, 98 | { 99 | "title": "DNC", 100 | "trial_paths": ["dnc/*/progress.csv"] 101 | }, 102 | { 103 | "title": "GCM", 104 | "trial_paths": ["gcm/*/progress.csv"], 105 | }, 106 | { 107 | "title": "GTrXL $(t-2)$", 108 | "trial_paths": ["gtrxl_2t/*/progress.csv"] 109 | } 110 | ] 111 | }, 112 | ] 113 | } 114 | -------------------------------------------------------------------------------- /src/plot/memory.py: -------------------------------------------------------------------------------- 1 | { 2 | "x": "timesteps_total", 3 | "x_label": "Training Timestep", 4 | "y_label": "Mean Reward per Train Batch", 5 | "y": "episode_reward_mean", 6 | "ci": 90, 7 | "range": (0.15, 1.05), 8 | "title": None, 9 | "smooth": 10, 10 | "group_category": "$n$", 11 | "trial_category": "Memory Module", 12 | "num_samples": 500, 13 | "output": "/tmp/plots/memory.pdf", 14 | "legend_offset": 0.88, 15 | "limit_line": None, 16 | "use_latex": True, 17 | 18 | # Each experiment group has its own plot 19 | "experiment_groups": [ 20 | { 21 | "group_title": "$16$", 22 | "replay": 0, 23 | "data_prefix": "/Users/smorad/data/corl_2021_exp/memory/8_cards/", 24 | # Each data is a collection of trials 25 | # Each trial is just an identical run, from which we compute mean/stddev 26 | "data": [ 27 | { 28 | "title": "MLP", 29 | "trial_paths": ["mlp/*/progress.csv"] 30 | }, 31 | { 32 | "title": "LSTM", 33 | "trial_paths": ["lstm/*/progress.csv"] 34 | }, 35 | { 36 | "title": "GTrXL", 37 | "trial_paths": ["gtrxl/*/progress.csv"] 38 | }, 39 | { 40 | "title": "DNC", 41 | "trial_paths": ["dnc/*/progress.csv"] 42 | }, 43 | { 44 | "title": "GCM", 45 | "trial_paths": ["gcm/*/progress.csv"], 46 | }, 47 | ] 48 | }, 49 | { 50 | "group_title": "$20$", 51 | "replay": 0, 52 | "data_prefix": "/Users/smorad/data/corl_2021_exp/memory/10_cards/", 53 | # Each data is a collection of trials 54 | # Each trial is just an identical run, from which we compute mean/stddev 55 | "data": [ 56 | { 57 | "title": "MLP", 58 | "trial_paths": ["mlp/*/progress.csv"] 59 | }, 60 | { 61 | "title": "LSTM", 62 | "trial_paths": ["lstm/*/progress.csv"] 63 | }, 64 | { 65 | "title": "GTrXL", 66 | "trial_paths": ["gtrxl/*/progress.csv"] 67 | }, 68 | { 69 | "title": "DNC", 70 | "trial_paths": ["dnc/*/progress.csv"] 71 | }, 72 | { 73 | "title": "GCM", 74 | "trial_paths": ["gcm/*/progress.csv"], 75 | }, 76 | ] 77 | }, 78 | { 79 | "group_title": "$24$", 80 | "replay": 0, 81 | "data_prefix": "/Users/smorad/data/corl_2021_exp/memory/12_cards/", 82 | # Each data is a collection of trials 83 | # Each trial is just an identical run, from which we compute mean/stddev 84 | "data": [ 85 | { 86 | "title": "MLP", 87 | "trial_paths": ["mlp/*/progress.csv"] 88 | }, 89 | { 90 | "title": "LSTM", 91 | "trial_paths": ["lstm/*/progress.csv"] 92 | }, 93 | { 94 | "title": "GTrXL", 95 | "trial_paths": ["gtrxl/*/progress.csv"] 96 | }, 97 | { 98 | "title": "DNC", 99 | "trial_paths": ["dnc/*/progress.csv"] 100 | }, 101 | { 102 | "title": "GCM", 103 | "trial_paths": ["gcm/*/progress.csv"], 104 | }, 105 | ] 106 | }, 107 | ] 108 | } 109 | -------------------------------------------------------------------------------- /src/plot/nav.py: -------------------------------------------------------------------------------- 1 | { 2 | "x": "timesteps_total", 3 | "x_label": "Training Timestep", 4 | "y_label": "Mean Reward per Train Batch", 5 | "y": "episode_reward_mean", 6 | "ci": 90, 7 | "range": (0.12, 0.52), 8 | "title": None, 9 | "smooth": 10, 10 | "group_category": "$|z|$", 11 | "trial_category": "Memory Module", 12 | "num_samples": 500, 13 | "output": "/tmp/plots/nav.pdf", 14 | "legend_offset": 0.88, 15 | "limit_line": None, 16 | "use_latex": True, 17 | 18 | "experiment_groups": [ 19 | { 20 | "group_title": "$8$", 21 | "replay": 1, 22 | "data_prefix": "/Users/smorad/data/corl_2021_exp/navigation/h8/", 23 | "data": [ 24 | { 25 | "title": "MLP", 26 | "trial_paths": ["mlp/*/progress.csv"] 27 | }, 28 | { 29 | "title": "LSTM", 30 | "trial_paths": ["lstm/*/progress.csv"] 31 | }, 32 | { 33 | "title": "GTrXL", 34 | "trial_paths": ["gtrxl_1t/*/progress.csv"] 35 | }, 36 | { 37 | "title": "DNC", 38 | "trial_paths": ["dnc/*/progress.csv"] 39 | }, 40 | { 41 | "title": "GCM", 42 | "trial_paths": ["gcm/*/progress.csv"], 43 | }, 44 | ] 45 | }, 46 | { 47 | "group_title": "$16$", 48 | "replay": 1, 49 | "data_prefix": "/Users/smorad/data/corl_2021_exp/navigation/h16/", 50 | "data": [ 51 | { 52 | "title": "MLP", 53 | "trial_paths": ["mlp/*/progress.csv"] 54 | }, 55 | { 56 | "title": "LSTM", 57 | "trial_paths": ["lstm/*/progress.csv"] 58 | }, 59 | { 60 | "title": "GTrXL", 61 | "trial_paths": ["gtrxl_1t/*/progress.csv"] 62 | }, 63 | { 64 | "title": "DNC", 65 | "trial_paths": ["dnc/*/progress.csv"] 66 | }, 67 | { 68 | "title": "GCM", 69 | "trial_paths": ["gcm/*/progress.csv"], 70 | }, 71 | ] 72 | }, 73 | { 74 | "group_title": "$32$", 75 | "replay": 1, 76 | "data_prefix": "/Users/smorad/data/corl_2021_exp/navigation/h32/", 77 | "data": [ 78 | { 79 | "title": "MLP", 80 | "trial_paths": ["mlp/*/progress.csv"] 81 | }, 82 | { 83 | "title": "LSTM", 84 | "trial_paths": ["lstm/*/progress.csv"] 85 | }, 86 | { 87 | "title": "GTrXL", 88 | "trial_paths": ["gtrxl_1t/*/progress.csv"] 89 | }, 90 | { 91 | "title": "DNC", 92 | "trial_paths": ["dnc/*/progress.csv"] 93 | }, 94 | { 95 | "title": "GCM", 96 | "trial_paths": ["gcm/*/progress.csv"], 97 | }, 98 | ] 99 | }, 100 | ] 101 | } 102 | -------------------------------------------------------------------------------- /src/plot/nav_sweep.py: -------------------------------------------------------------------------------- 1 | { 2 | "x": "timesteps_total", 3 | "x_label": "Training Timestep", 4 | "y_label": "Mean Reward per Train Batch", 5 | "y": "episode_reward_mean", 6 | "ci": 90, 7 | "range": (0.12, 0.52), 8 | "title": None, 9 | "smooth": 10, 10 | "group_category": "$|z|$", 11 | "trial_category": "GCM Prior", 12 | "num_samples": 500, 13 | "output": "/tmp/plots/gcm_sweep.pdf", 14 | "legend_offset": 0.87, 15 | "limit_line": None, 16 | "use_latex": True, 17 | 18 | "experiment_groups": [ 19 | { 20 | "group_title": "$8$", 21 | "replay": 1, 22 | "data_prefix": "/Users/smorad/data/corl_2021_exp/navigation_sweep/h8/", 23 | "data": [ 24 | { 25 | "title": "None", 26 | "trial_paths": ["none/*/progress.csv"], 27 | }, 28 | { 29 | "title": "Temporal", 30 | "trial_paths": ["temporal/*/progress.csv"] 31 | }, 32 | { 33 | "title": "Spatial", 34 | "trial_paths": ["spatial/*/progress.csv"] 35 | }, 36 | { 37 | "title": "Latent Sim.", 38 | "trial_paths": ["vae/*/progress.csv"] 39 | }, 40 | ] 41 | }, 42 | { 43 | "group_title": "$16$", 44 | "replay": 1, 45 | "data_prefix": "/Users/smorad/data/corl_2021_exp/navigation_sweep/h16/", 46 | "data": [ 47 | { 48 | "title": "None", 49 | "trial_paths": ["none/*/progress.csv"], 50 | }, 51 | { 52 | "title": "Temporal", 53 | "trial_paths": ["temporal/*/progress.csv"] 54 | }, 55 | { 56 | "title": "Spatial", 57 | "trial_paths": ["spatial/*/progress.csv"] 58 | }, 59 | { 60 | "title": "Latent Sim.", 61 | "trial_paths": ["vae/*/progress.csv"] 62 | }, 63 | ] 64 | }, 65 | { 66 | "group_title": "$32$", 67 | "replay": 1, 68 | "data_prefix": "/Users/smorad/data/corl_2021_exp/navigation_sweep/h32/", 69 | "data": [ 70 | { 71 | "title": "None", 72 | "trial_paths": ["none/*/progress.csv"], 73 | }, 74 | { 75 | "title": "Temporal", 76 | "trial_paths": ["temporal/*/progress.csv"] 77 | }, 78 | { 79 | "title": "Spatial", 80 | "trial_paths": ["spatial/*/progress.csv"] 81 | }, 82 | { 83 | "title": "Latent Sim.", 84 | "trial_paths": ["vae/*/progress.csv"] 85 | }, 86 | ] 87 | }, 88 | ] 89 | } 90 | -------------------------------------------------------------------------------- /src/policies/random_policy.py: -------------------------------------------------------------------------------- 1 | from ray.rllib.policy import Policy 2 | 3 | 4 | class RandomPolicy(Policy): 5 | """Example of a custom policy written from scratch. 6 | 7 | You might find it more convenient to use the `build_tf_policy` and 8 | `build_torch_policy` helpers instead for a real policy, which are 9 | described in the next sections. 10 | """ 11 | 12 | def __init__(self, observation_space, action_space, config): 13 | Policy.__init__(self, observation_space, action_space, config) 14 | 15 | def compute_actions( 16 | self, 17 | obs_batch, 18 | state_batches, 19 | prev_action_batch=None, 20 | prev_reward_batch=None, 21 | info_batch=None, 22 | episodes=None, 23 | **kwargs 24 | ): 25 | # return action batch, RNN states, extra values to include in batch 26 | return [self.action_space.sample() for _ in obs_batch], [], {} 27 | 28 | def learn_on_batch(self, samples): 29 | # implement your learning code here 30 | return {} # return stats 31 | 32 | def get_weights(self): 33 | return {} 34 | 35 | def set_weights(self, weights): 36 | pass 37 | -------------------------------------------------------------------------------- /src/preprocessors/autoencoder/dataset_collector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import numpy as np 4 | from habitat_baselines.common.obs_transformers import ObservationTransformer 5 | 6 | from models.vae import VAE 7 | from semantic_colors import COLORS64 8 | 9 | 10 | class DataCollector(ObservationTransformer): 11 | def __init__(self, env, num_cats=43, batch_size=8192, num_batches=16): 12 | super().__init__() 13 | self.dtype = np.float32 14 | self.env = env 15 | self.fwd_ctr = 0 16 | self.batch_size = batch_size 17 | self.num_batches = num_batches 18 | self.storage = np.zeros((batch_size, 43 + 1, 32, 32)) 19 | 20 | def transform_observation_space(self, obs_space): 21 | assert ( 22 | "semantic" in obs_space.spaces and "depth" in obs_space.spaces 23 | ), "VAE requires depth and semantic images" 24 | # Pop these so rllib doesn't spend forever training with them 25 | obs_space.spaces.pop("semantic", None) 26 | obs_space.spaces.pop("depth", None) 27 | return obs_space 28 | 29 | def forward(self, obs): 30 | self.in_sem = obs.pop("semantic", None) 31 | self.in_depth = obs.pop("depth", None) 32 | img = self.to_img(self.in_sem, self.in_depth) 33 | self.storage[self.fwd_ctr % self.batch_size] = img 34 | self.fwd_ctr += 1 35 | 36 | if self.fwd_ctr != 0 and self.fwd_ctr % self.batch_size == 0: 37 | torch.save(self.storage, f"img_batch_{self.fwd_ctr // self.batch_size}") 38 | self.storage = np.zeros((self.batch_size, 43 + 1, 32, 32)) 39 | 40 | if self.fwd_ctr >= self.batch_size * self.num_batches: 41 | raise Exception("Done") 42 | 43 | return obs 44 | 45 | def to_img(self, semantic, depth): 46 | """Build obs into an image tensor for feeding to nn""" 47 | # [batch, channel, cols, rows] 48 | channels, cols, rows = ( # type: ignore 49 | semantic.shape[0] + 1, 50 | *semantic.shape[1:], # type : ignore 51 | ) 52 | 53 | depth_channel = semantic.shape[0] 54 | x = torch.zeros( 55 | (channels, cols, rows), 56 | dtype=torch.float32, 57 | ) 58 | x[0:depth_channel] = torch.Tensor(semantic) 59 | x[depth_channel] = torch.Tensor(depth).squeeze() 60 | return x.unsqueeze(0) 61 | 62 | def from_config(cls, config): 63 | # TODO: Figure out if we need this 64 | raise NotImplementedError() 65 | -------------------------------------------------------------------------------- /src/preprocessors/autoencoder/ppae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import cv2 4 | import numpy as np 5 | from habitat_baselines.common.obs_transformers import ObservationTransformer 6 | 7 | from semantic_colors import COLORS64 8 | 9 | # TODO: This file needs to be cleaned up 10 | 11 | 12 | class PPAE(ObservationTransformer): 13 | def __init__(self, env, cpt_path="/root/vnav/ae.pt"): 14 | super().__init__() 15 | self.dtype = np.float32 16 | self.env = env 17 | self.net = torch.load(cpt_path).to("cpu") 18 | # self.net.load_state_dict(cpt['model_state_dict']) 19 | self.net.eval() 20 | 21 | def transform_observation_space(self, obs_space): 22 | assert ( 23 | "semantic" in obs_space.spaces and "depth" in obs_space.spaces 24 | ), "AE requires depth and semantic images" 25 | self.sensor_shape = np.array( 26 | ( 27 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.HEIGHT, 28 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.WIDTH, 29 | ), 30 | dtype=np.int32, 31 | ) 32 | self.shape = (self.net.h_dim,) 33 | obs_space.spaces.pop("semantic", None) 34 | obs_space.spaces.pop("depth", None) 35 | obs_space.spaces["ae"] = gym.spaces.Box( 36 | shape=self.shape, 37 | dtype=self.dtype, 38 | high=np.finfo(self.dtype).max, 39 | low=np.finfo(self.dtype).min, 40 | ) 41 | return obs_space 42 | 43 | def forward(self, obs): 44 | self.in_sem = obs.pop("semantic", None) 45 | self.in_depth = obs.pop("depth", None) 46 | assert ( 47 | self.in_sem.shape[1:] == self.in_depth.shape 48 | ), "Semantic and depth must have the same shape to use the vae preprocessor" 49 | with torch.no_grad(): 50 | img = self.to_img(self.in_sem, self.in_depth) 51 | self.z = self.net.encode(img) 52 | obs["ae"] = self.z.squeeze().numpy() 53 | return obs 54 | 55 | def reconstruct(self): 56 | recon = self.net.decode(self.z) 57 | out_sem = recon[0, :-1, :, :].detach().numpy() 58 | out_depth = recon[0, -1, :, :].detach().numpy() 59 | return out_depth, out_sem 60 | 61 | def to_img(self, semantic, depth): 62 | """Build obs into an image tensor for feeding to nn""" 63 | # [batch, channel, cols, rows] 64 | channels, cols, rows = ( # type: ignore 65 | semantic.shape[0] + 1, 66 | *semantic.shape[1:], # type : ignore 67 | ) 68 | 69 | depth_channel = semantic.shape[0] 70 | x = torch.zeros( 71 | (channels, cols, rows), 72 | dtype=torch.float32, 73 | ) 74 | x[0:depth_channel] = torch.Tensor(semantic) 75 | x[depth_channel] = torch.Tensor(depth).squeeze() 76 | return x.unsqueeze(0) 77 | 78 | def visualize(self): 79 | # TODO: Clean this up 80 | out_depth, out_sem = self.reconstruct() 81 | img = np.zeros((*out_depth.shape, 3), dtype=np.uint8) 82 | # To single channel where px value is semantic class 83 | max_likelihood_px = out_sem.argmax(axis=0) 84 | # Convert to RGB 85 | img = COLORS64[max_likelihood_px.flat].reshape(*max_likelihood_px.shape, 3) 86 | sem_out = cv2.resize( 87 | img, tuple(self.sensor_shape), interpolation=cv2.INTER_NEAREST 88 | ).astype(np.uint8) 89 | 90 | depth_out = ( 91 | 255 * cv2.resize(out_depth, tuple(self.sensor_shape), cv2.INTER_NEAREST) 92 | ).astype(np.uint8) 93 | depth_out = np.stack((depth_out,) * 3, axis=-1) 94 | 95 | return cv2.hconcat([sem_out, depth_out]) 96 | 97 | def from_config(cls, config): 98 | # TODO: Figure out if we need this 99 | raise NotImplementedError() 100 | -------------------------------------------------------------------------------- /src/preprocessors/autoencoder/ppd_vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import cv2 4 | import numpy as np 5 | from habitat_baselines.common.obs_transformers import ObservationTransformer 6 | 7 | from models.depth_vae import DepthVAE 8 | 9 | # TODO: This file needs to be cleaned up 10 | 11 | 12 | class PPDepthVAE(ObservationTransformer): 13 | def __init__(self, env, cpt_path="/root/vnav/depth_vae.pt"): 14 | super().__init__() 15 | self.dtype = np.float32 16 | self.env = env 17 | self.net = torch.load(cpt_path).to("cpu") 18 | # self.net.load_state_dict(cpt['model_state_dict']) 19 | self.net.eval() 20 | 21 | def transform_observation_space(self, obs_space): 22 | assert "depth" in obs_space.spaces, "VAE requires rgb and depth images" 23 | self.sensor_shape = np.array( 24 | ( 25 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.HEIGHT, 26 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.WIDTH, 27 | ), 28 | dtype=np.int32, 29 | ) 30 | self.shape = (self.net.z_dim,) 31 | obs_space.spaces.pop("rgb", None) 32 | obs_space.spaces.pop("depth", None) 33 | obs_space.spaces["vae"] = gym.spaces.Box( 34 | shape=self.shape, 35 | dtype=self.dtype, 36 | high=np.finfo(self.dtype).max, 37 | low=np.finfo(self.dtype).min, 38 | ) 39 | return obs_space 40 | 41 | def forward(self, obs): 42 | obs.pop("rgb", None) 43 | self.in_depth = obs.pop("depth", None) 44 | with torch.no_grad(): 45 | img = self.to_img(self.in_depth) 46 | self.z, self.mu, self.logvar = self.net.encode(img) 47 | obs["vae"] = self.z.squeeze().numpy() 48 | return obs 49 | 50 | def reconstruct(self): 51 | recon = self.net.decode(self.z) 52 | out_depth = recon[0, 0, :, :].detach().numpy() 53 | return out_depth 54 | 55 | def to_img(self, depth): 56 | """Build obs into an image tensor for feeding to nn""" 57 | dep = torch.from_numpy(depth) 58 | 59 | # result: [h, w, 4] 60 | x = dep.permute(2, 0, 1) 61 | assert x.max() <= 1.0 and x.min() >= 0.0 62 | # [batch, channel, cols, rows] 63 | return x.unsqueeze(0) 64 | 65 | def visualize(self, scale=4): 66 | # TODO: Clean this up 67 | out_depth = self.reconstruct() 68 | depth_out = ( 69 | cv2.resize( 70 | 255 * out_depth, tuple(scale * self.sensor_shape), cv2.INTER_NEAREST 71 | ) 72 | ).astype(np.uint8) 73 | depth_out = np.stack((depth_out,) * 3, axis=-1) 74 | 75 | return depth_out 76 | 77 | def from_config(cls, config): 78 | # TODO: Figure out if we need this 79 | raise NotImplementedError() 80 | -------------------------------------------------------------------------------- /src/preprocessors/autoencoder/pprgbd_vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import cv2 4 | import numpy as np 5 | from habitat_baselines.common.obs_transformers import ObservationTransformer 6 | 7 | from models.rgbd_vae import RGBDVAE 8 | 9 | # TODO: This file needs to be cleaned up 10 | 11 | 12 | class PPRGBDVAE(ObservationTransformer): 13 | def __init__(self, env, cpt_path="/root/vnav/rgbd_vae.pt"): 14 | super().__init__() 15 | self.dtype = np.float32 16 | self.env = env 17 | self.net = torch.load(cpt_path).to("cpu") 18 | # self.net.load_state_dict(cpt['model_state_dict']) 19 | self.net.eval() 20 | 21 | def transform_observation_space(self, obs_space): 22 | assert ( 23 | "rgb" in obs_space.spaces and "depth" in obs_space.spaces 24 | ), "VAE requires rgb and depth images" 25 | self.sensor_shape = np.array( 26 | ( 27 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.HEIGHT, 28 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.WIDTH, 29 | ), 30 | dtype=np.int32, 31 | ) 32 | self.shape = (self.net.z_dim,) 33 | obs_space.spaces.pop("rgb", None) 34 | obs_space.spaces.pop("depth", None) 35 | obs_space.spaces["vae"] = gym.spaces.Box( 36 | shape=self.shape, 37 | dtype=self.dtype, 38 | high=np.finfo(self.dtype).max, 39 | low=np.finfo(self.dtype).min, 40 | ) 41 | return obs_space 42 | 43 | def forward(self, obs): 44 | self.in_rgb = obs.pop("rgb", None) 45 | self.in_depth = obs.pop("depth", None) 46 | assert ( 47 | self.in_rgb.shape[:-1] == self.in_depth.shape[:-1] 48 | ), "RGB and depth must have the same shape to use the vae preprocessor" 49 | with torch.no_grad(): 50 | img = self.to_img(self.in_rgb, self.in_depth) 51 | self.z, self.mu, self.logvar = self.net.encode(img) 52 | obs["vae"] = self.z.squeeze().numpy() 53 | return obs 54 | 55 | def reconstruct(self): 56 | recon = self.net.decode(self.z) 57 | out_rgb = recon[0, :-1, :, :].detach().numpy() 58 | out_depth = recon[0, -1, :, :].detach().numpy() 59 | return out_depth, out_rgb 60 | 61 | def to_img(self, semantic, depth): 62 | """Build obs into an image tensor for feeding to nn""" 63 | sem = torch.from_numpy(semantic) / 255.0 64 | dep = torch.from_numpy(depth) 65 | 66 | x = torch.cat((sem, dep), dim=-1) 67 | x = x.permute(2, 0, 1) 68 | assert x.max() <= 1.0 and x.min() >= 0.0 69 | # [batch, channel, cols, rows] 70 | return x.unsqueeze(0) 71 | 72 | def visualize(self): 73 | # TODO: Clean this up 74 | out_depth, out_rgb = self.reconstruct() 75 | rgb_out = 255 * cv2.resize( 76 | out_rgb, tuple(self.sensor_shape), interpolation=cv2.INTER_NEAREST 77 | ).astype(np.uint8) 78 | 79 | depth_out = ( 80 | 255 * cv2.resize(out_depth, tuple(self.sensor_shape), cv2.INTER_NEAREST) 81 | ).astype(np.uint8) 82 | depth_out = np.stack((depth_out,) * 3, axis=-1) 83 | 84 | return cv2.hconcat([rgb_out, depth_out]) 85 | 86 | def from_config(cls, config): 87 | # TODO: Figure out if we need this 88 | raise NotImplementedError() 89 | -------------------------------------------------------------------------------- /src/preprocessors/autoencoder/train_vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import numpy as np 4 | from habitat_baselines.common.obs_transformers import ObservationTransformer 5 | 6 | from models.vae import VAE 7 | from semantic_colors import COLORS64 8 | 9 | # TODO: This file needs to be cleaned up 10 | 11 | 12 | class VAETrainer(ObservationTransformer): 13 | def __init__(self, env, num_cats=43, height_fac=10, width_fac=10): 14 | super().__init__() 15 | self.dtype = np.float32 16 | self.env = env 17 | self.nn = VAE() 18 | learning_rate = 1e-4 19 | self.optimizer = torch.optim.Adam(self.nn.parameters(), lr=learning_rate) 20 | self.losses = [] 21 | 22 | def transform_observation_space(self, obs_space): 23 | assert ( 24 | "semantic" in obs_space.spaces and "depth" in obs_space.spaces 25 | ), "VAE requires depth and semantic images" 26 | self.sensor_shape = np.array( 27 | ( 28 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.HEIGHT, 29 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.WIDTH, 30 | ), 31 | dtype=np.int32, 32 | ) 33 | # Pop these so rllib doesn't spend forever training with them 34 | obs_space.spaces.pop("semantic", None) 35 | obs_space.spaces.pop("depth", None) 36 | return obs_space 37 | 38 | def backprop(self, in_sem, in_depth, out_sem, out_depth, depth_to_sem_ratio=1 / 4): 39 | sem_factor = 1 / in_sem.shape[1] 40 | MSE_sem = ( 41 | torch.nn.functional.mse_loss(in_sem, out_sem) 42 | * (1 / depth_to_sem_ratio) 43 | * sem_factor 44 | ) 45 | MSE_depth = torch.nn.functional.mse_loss(in_depth, out_depth) 46 | KLD = -0.5 * torch.mean(1 + self.logvar - self.mu.pow(2) - self.logvar.exp()) 47 | loss = MSE_sem + MSE_depth + KLD 48 | self.losses.append(loss.item()) 49 | if len(self.losses) == 100: 50 | print("Mean loss:", np.mean(self.losses)) 51 | self.losses.clear() 52 | self.optimizer.zero_grad() 53 | loss.backward() 54 | self.optimizer.step() 55 | 56 | def forward(self, obs): 57 | self.in_sem = obs.pop("semantic", None) 58 | self.in_depth = obs.pop("depth", None) 59 | img = self.to_img(self.in_sem, self.in_depth) 60 | recon, self.mu, self.logvar = self.nn(img) 61 | self.out_sem = recon[0, :-1, :, :] 62 | self.out_depth = recon[0, -1, :, :] 63 | self.optimizer.zero_grad() 64 | self.backprop( 65 | torch.Tensor(self.in_sem), 66 | torch.Tensor(self.in_depth), 67 | self.out_sem, 68 | self.out_depth, 69 | ) 70 | return obs 71 | 72 | def to_img(self, semantic, depth): 73 | """Build obs into an image tensor for feeding to nn""" 74 | # [batch, channel, cols, rows] 75 | channels, cols, rows = ( # type: ignore 76 | semantic.shape[0] + 1, 77 | *semantic.shape[1:], # type : ignore 78 | ) 79 | 80 | depth_channel = semantic.shape[0] 81 | x = torch.zeros( 82 | (channels, cols, rows), 83 | dtype=torch.float32, 84 | ) 85 | x[0:depth_channel] = torch.Tensor(semantic) 86 | x[depth_channel] = torch.Tensor(depth).squeeze() 87 | return x.unsqueeze(0) 88 | 89 | def visualize(self): 90 | # TODO: Clean this up 91 | out_depth = self.out_depth.detach().numpy() 92 | out_sem = self.out_sem.detach().numpy() 93 | img = np.zeros((*out_depth.shape, 3), dtype=np.uint8) 94 | for layer_idx in range(out_sem.shape[0]): 95 | layer = out_sem[layer_idx, :, :] 96 | filled_px = np.argwhere(np.around(layer) == 1) 97 | img[filled_px[:, 0], filled_px[:, 1]] = COLORS64[layer_idx] 98 | sem_out = cv2.resize( 99 | img, tuple(self.sensor_shape), interpolation=cv2.INTER_NEAREST 100 | ) 101 | 102 | depth_out = ( 103 | 255 * cv2.resize(out_depth, tuple(self.sensor_shape), cv2.INTER_NEAREST) 104 | ).astype(np.uint8) 105 | depth_out = np.stack((depth_out,) * 3, axis=-1) 106 | 107 | return cv2.hconcat([sem_out, depth_out]) 108 | 109 | def from_config(cls, config): 110 | # TODO: Figure out if we need this 111 | raise NotImplementedError() 112 | -------------------------------------------------------------------------------- /src/preprocessors/autoencoder/vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gym 3 | import cv2 4 | import numpy as np 5 | from habitat_baselines.common.obs_transformers import ObservationTransformer 6 | 7 | from models.vae import VAE 8 | from semantic_colors import COLORS64 9 | 10 | # TODO: This file needs to be cleaned up 11 | 12 | 13 | class PPVAE(ObservationTransformer): 14 | def __init__(self, env, cpt_path="/root/vnav/vae.pt"): 15 | super().__init__() 16 | self.dtype = np.float32 17 | self.env = env 18 | self.net = torch.load(cpt_path).to("cpu") 19 | # self.net.load_state_dict(cpt['model_state_dict']) 20 | self.net.eval() 21 | 22 | def transform_observation_space(self, obs_space): 23 | assert ( 24 | "semantic" in obs_space.spaces and "depth" in obs_space.spaces 25 | ), "VAE requires depth and semantic images" 26 | self.sensor_shape = np.array( 27 | ( 28 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.HEIGHT, 29 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.WIDTH, 30 | ), 31 | dtype=np.int32, 32 | ) 33 | self.shape = (self.net.z_dim,) 34 | obs_space.spaces.pop("semantic", None) 35 | obs_space.spaces.pop("depth", None) 36 | obs_space.spaces["vae"] = gym.spaces.Box( 37 | shape=self.shape, 38 | dtype=self.dtype, 39 | high=np.finfo(self.dtype).max, 40 | low=np.finfo(self.dtype).min, 41 | ) 42 | return obs_space 43 | 44 | def forward(self, obs): 45 | self.in_sem = obs.pop("semantic", None) 46 | self.in_depth = obs.pop("depth", None) 47 | assert ( 48 | self.in_sem.shape[1:] == self.in_depth.shape 49 | ), "Semantic and depth must have the same shape to use the vae preprocessor" 50 | with torch.no_grad(): 51 | img = self.to_img(self.in_sem, self.in_depth) 52 | self.z, self.mu, self.logvar = self.net.encode(img) 53 | obs["vae"] = self.z.squeeze() 54 | return obs 55 | 56 | def reconstruct(self): 57 | recon = self.net.decode(self.z) 58 | out_sem = recon[0, :-1, :, :].detach().numpy() 59 | out_depth = recon[0, -1, :, :].detach().numpy() 60 | return out_depth, out_sem 61 | 62 | def to_img(self, semantic, depth): 63 | """Build obs into an image tensor for feeding to nn""" 64 | # [batch, channel, cols, rows] 65 | channels, cols, rows = ( # type: ignore 66 | semantic.shape[0] + 1, 67 | *semantic.shape[1:], # type : ignore 68 | ) 69 | 70 | depth_channel = semantic.shape[0] 71 | x = torch.zeros( 72 | (channels, cols, rows), 73 | dtype=torch.float32, 74 | ) 75 | x[0:depth_channel] = torch.Tensor(semantic) 76 | x[depth_channel] = torch.Tensor(depth).squeeze() 77 | return x.unsqueeze(0) 78 | 79 | def visualize(self): 80 | # TODO: Clean this up 81 | out_depth, out_sem = self.reconstruct() 82 | img = np.zeros((*out_depth.shape, 3), dtype=np.uint8) 83 | # To single channel where px value is semantic class 84 | max_likelihood_px = out_sem.argmax(axis=0) 85 | # Convert to RGB 86 | img = COLORS64[max_likelihood_px.flat].reshape(*max_likelihood_px.shape, 3) 87 | sem_out = cv2.resize( 88 | img, tuple(self.sensor_shape), interpolation=cv2.INTER_NEAREST 89 | ).astype(np.uint8) 90 | 91 | depth_out = ( 92 | 255 * cv2.resize(out_depth, tuple(self.sensor_shape), cv2.INTER_NEAREST) 93 | ).astype(np.uint8) 94 | depth_out = np.stack((depth_out,) * 3, axis=-1) 95 | 96 | return cv2.hconcat([sem_out, depth_out]) 97 | 98 | def from_config(cls, config): 99 | # TODO: Figure out if we need this 100 | raise NotImplementedError() 101 | -------------------------------------------------------------------------------- /src/preprocessors/compass_components.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | from habitat_baselines.common.obs_transformers import ObservationTransformer 4 | 5 | 6 | class CompassComponents(ObservationTransformer): 7 | def __init__(self, env): 8 | super().__init__() 9 | 10 | def transform_observation_space(self, obs_space): 11 | if "compass" not in obs_space.spaces: 12 | return obs_space 13 | 14 | obs_space.spaces["compass"] = gym.spaces.Box( 15 | high=1.0, 16 | low=-1.0, 17 | dtype=np.float, 18 | shape=(2, 1), 19 | ) 20 | 21 | return obs_space 22 | 23 | def forward(self, obs): 24 | if "compass" not in obs: 25 | return obs 26 | 27 | # Correct for over/underflow 28 | obs["compass"] = np.array([np.sin(obs["compass"]), np.cos(obs["compass"])]) 29 | return obs 30 | 31 | def from_config(cls, config): 32 | # TODO: Figure out if we need this 33 | raise NotImplementedError() 34 | -------------------------------------------------------------------------------- /src/preprocessors/compass_fix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from habitat_baselines.common.obs_transformers import ObservationTransformer 3 | 4 | 5 | class CompassFix(ObservationTransformer): 6 | def __init__(self, env): 7 | super().__init__() 8 | 9 | def transform_observation_space(self, obs_space): 10 | if "compass" not in obs_space.spaces: 11 | return obs_space 12 | 13 | sp = obs_space.spaces["compass"] 14 | self.low = sp.low 15 | self.high = sp.high 16 | 17 | return obs_space 18 | 19 | def forward(self, obs): 20 | if "compass" not in obs: 21 | return obs 22 | 23 | # Correct for over/underflow 24 | obs["compass"] = np.clip(obs["compass"], self.low, self.high) 25 | return obs 26 | 27 | def from_config(cls, config): 28 | # TODO: Figure out if we need this 29 | raise NotImplementedError() 30 | -------------------------------------------------------------------------------- /src/preprocessors/ghost_rgb.py: -------------------------------------------------------------------------------- 1 | from habitat_baselines.common.obs_transformers import ObservationTransformer 2 | 3 | 4 | class GhostRGB(ObservationTransformer): 5 | def __init__(self, env): 6 | super().__init__() 7 | 8 | def transform_observation_space(self, obs_space): 9 | obs_space.spaces.pop("rgb", None) 10 | return obs_space 11 | 12 | def forward(self, obs): 13 | obs.pop("rgb", None) 14 | return obs 15 | 16 | def from_config(cls, config): 17 | # TODO: Figure out if we need this 18 | raise NotImplementedError() 19 | -------------------------------------------------------------------------------- /src/preprocessors/nn_semantic.py: -------------------------------------------------------------------------------- 1 | import habitat 2 | import numpy as np 3 | from gym import spaces 4 | 5 | import detectron2 6 | from detectron2.utils.logger import setup_logger 7 | import cv2 8 | import random 9 | 10 | from detectron2 import model_zoo 11 | from detectron2.engine import DefaultPredictor 12 | from detectron2.config import get_cfg 13 | from detectron2.utils.visualizer import Visualizer 14 | from detectron2.data import MetadataCatalog 15 | import torch 16 | 17 | setup_logger() 18 | 19 | # Ball is coco class 32 20 | 21 | 22 | @habitat.registry.register_sensor(name="NNSemanticSensor") 23 | class NNSemanticSensor(habitat.Sensor): 24 | def init_rcnn(self): 25 | cfg = get_cfg() 26 | cfg.merge_from_file( 27 | model_zoo.get_config_file( 28 | "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" 29 | ) 30 | ) 31 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3 32 | cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( 33 | "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" 34 | ) 35 | self.predictor = DefaultPredictor(cfg) 36 | 37 | def __init__(self, sim, config, **kwargs): 38 | self._sim = sim 39 | self.config = config 40 | self.init_rcnn() 41 | super().__init__(config=config) 42 | 43 | def _get_uuid(self, *args, **kwargs): 44 | return "nn_semantic_sensor" 45 | 46 | def _get_sensor_type(self, *args, **kwargs): 47 | return habitat.SensorTypes.SEMANTIC 48 | 49 | def _get_observation_space(self, *args, **kwargs): 50 | # We don't have access to the full config 51 | # so just make our dims the same as the color camera 52 | rgb = self._sim.config.agents[0].sensor_specifications[0] 53 | assert rgb.uuid == "rgb" 54 | dims = rgb.resolution 55 | return spaces.Box( 56 | low=0, 57 | high=1, 58 | shape=(80, *dims), 59 | dtype=np.float32, 60 | ) 61 | 62 | def get_observation(self, observations, *args, episode, **kwargs): 63 | rgb = observations["rgb"] 64 | # Format is: 65 | # Each semantic category gets a channel 66 | # Instances share semantic channels 67 | return self.predictor(rgb)["sem_seg"] 68 | -------------------------------------------------------------------------------- /src/preprocessors/noop.py: -------------------------------------------------------------------------------- 1 | from habitat_baselines.common.obs_transformers import ObservationTransformer 2 | 3 | 4 | class NoopPP(ObservationTransformer): 5 | def __init__(self, env): 6 | super().__init__() 7 | 8 | def transform_observation_space(self, obs_space): 9 | return obs_space 10 | 11 | def forward(self, obs): 12 | return obs 13 | 14 | def from_config(self, cfg): 15 | raise NotImplementedError() 16 | -------------------------------------------------------------------------------- /src/preprocessors/objectgoal.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import spaces 3 | import cv2 4 | from habitat_baselines.common.obs_transformers import ObservationTransformer 5 | from semantic_colors import COLORS64 6 | 7 | 8 | class ObjectGoalBoolean(ObservationTransformer): 9 | def __init__(self, env): 10 | self.env = env 11 | self.dtype = np.float32 12 | super().__init__() 13 | 14 | def transform_observation_space(self, obs_space): 15 | assert ( 16 | "objectgoal" in obs_space.spaces 17 | ), "This pp requires the objectgoal sensor" 18 | assert "semantic" in obs_space.spaces, "This pp requires the semantic sensor" 19 | 20 | # TODO: more gracefully handle both sem_onehot and sem_float 21 | obs_space.spaces["target_in_view"] = spaces.Box( 22 | low=0, high=1, shape=(1,), dtype=self.dtype 23 | ) 24 | return obs_space 25 | 26 | def forward(self, obs): 27 | assert "objectgoal" in obs, "This pp requires objectgoal input" 28 | assert "semantic" in obs, "This pp requires semantic input" 29 | 30 | self.tgt_cat = obs["objectgoal"][0].copy() 31 | # IMPORTANT: This must be loaded after SemanticOneHot 32 | # as we don't know if obs[semantic] is onehot or an image 33 | # TODO: Detect if SemanticOneHot is in envs.preprocessors and 34 | # branch based on that 35 | self.tgt_found = obs["semantic"][self.tgt_cat] 36 | obs["target_in_view"] = np.array([self.tgt_found], dtype=self.dtype) 37 | return obs 38 | 39 | def from_config(cls, config): 40 | # TODO: Figure out if we need this 41 | raise NotImplementedError() 42 | 43 | def visualize(self): 44 | out = 254 * np.ones((128, 128, 3), dtype=np.uint8) 45 | tgt_label = self.env.cat_to_str[self.tgt_cat] 46 | txt0 = f"{tgt_label}({self.tgt_cat})" 47 | 48 | if self.tgt_found: 49 | txt1 = "FOUND" 50 | else: 51 | txt1 = "NOT FOUND" 52 | 53 | color = tuple(COLORS64[self.tgt_cat].tolist()) 54 | cv2.putText(out, txt0, (0, 64), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) 55 | cv2.putText(out, txt1, (0, 96), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) 56 | return out 57 | -------------------------------------------------------------------------------- /src/preprocessors/quantized_depth.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import cv2 4 | from gym import spaces 5 | from habitat_baselines.common.obs_transformers import ObservationTransformer 6 | 7 | 8 | class QuantizedDepth(ObservationTransformer): 9 | def __init__(self, env, height_fac=1, width_fac=1): 10 | self.env = env 11 | self.dtype = np.float32 12 | self.facs = np.array((height_fac, width_fac), dtype=np.int32) 13 | super().__init__() 14 | 15 | def transform_observation_space(self, obs_space): 16 | if "depth" not in obs_space.spaces: 17 | return obs_space 18 | 19 | self.sensor_shape = np.array( 20 | ( 21 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.HEIGHT, 22 | self.env.hab_cfg.SIMULATOR.DEPTH_SENSOR.WIDTH, 23 | ), 24 | dtype=np.int32, 25 | ) 26 | self.shape = self.sensor_shape // self.facs 27 | assert np.isclose( 28 | self.facs * self.shape, self.sensor_shape 29 | ).all(), "Shapes do not align, change the factors" 30 | 31 | obs_space.spaces["depth"] = spaces.Box( 32 | # 1 is far, 0 is near 33 | low=0, 34 | high=1, 35 | shape=self.shape, 36 | dtype=self.dtype, 37 | ) 38 | return obs_space 39 | 40 | def forward(self, obs): 41 | if "depth" not in obs: 42 | return obs 43 | 44 | depth = np.reshape(obs["depth"], (1, *self.sensor_shape)) 45 | # Rather than mean, we care about the closest object per pixel 46 | # i.e. min_pool 47 | quant_depth = ( 48 | torch.nn.functional.max_pool2d(torch.FloatTensor(depth), self.facs.tolist()) 49 | .squeeze() 50 | .numpy() 51 | ) 52 | obs["depth"] = quant_depth 53 | self.quant_depth = quant_depth 54 | return obs 55 | 56 | def visualize(self): 57 | return ( 58 | 254 59 | * cv2.resize(self.quant_depth, tuple(self.sensor_shape), cv2.INTER_NEAREST) 60 | ).astype(np.uint8) 61 | 62 | def from_config(cls, config): 63 | # TODO: Figure out if we need this 64 | raise NotImplementedError() 65 | -------------------------------------------------------------------------------- /src/preprocessors/semantic/continuous_onehot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import spaces 3 | from habitat_baselines.common.obs_transformers import ObservationTransformer 4 | 5 | 6 | class SemanticFloat(ObservationTransformer): 7 | def __init__(self, env, num_cats=43): 8 | self.env = env 9 | self.num_cats = num_cats 10 | self.dtype = np.float32 11 | super().__init__() 12 | 13 | def transform_observation_space(self, obs_space): 14 | if "semantic" not in obs_space.spaces: 15 | return obs_space 16 | 17 | self.shape = (self.num_cats,) 18 | obs_space.spaces["semantic"] = spaces.Box( 19 | low=0, high=1, shape=self.shape, dtype=self.dtype 20 | ) 21 | return obs_space 22 | 23 | def forward(self, obs): 24 | if "semantic" not in obs: 25 | return obs 26 | 27 | uniqs, counts = np.unique(obs["semantic"]) 28 | detected_cats = self.env.instance_to_cat[uniqs] 29 | output = np.zeros(self.shape, dtype=self.dtype) 30 | for i in range(len(detected_cats)): 31 | output[detected_cats[i]] += counts[i] 32 | output /= output.sum() 33 | obs["semantic"] = output 34 | return obs 35 | 36 | def from_config(cls, config): 37 | # TODO: Figure out if we need this 38 | raise NotImplementedError() 39 | -------------------------------------------------------------------------------- /src/preprocessors/semantic/onehot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import spaces 3 | from habitat_baselines.common.obs_transformers import ObservationTransformer 4 | 5 | 6 | class SemanticOnehot(ObservationTransformer): 7 | def __init__(self, env, num_cats=43): 8 | self.env = env 9 | self.num_cats = num_cats 10 | self.dtype = np.int32 11 | super().__init__() 12 | 13 | def transform_observation_space(self, obs_space): 14 | if "semantic" not in obs_space.spaces: 15 | return obs_space 16 | 17 | self.shape = (self.num_cats,) 18 | obs_space.spaces["semantic"] = spaces.Box( 19 | low=0, high=1, shape=self.shape, dtype=self.dtype 20 | ) 21 | return obs_space 22 | 23 | def forward(self, obs): 24 | if "semantic" not in obs: 25 | return obs 26 | 27 | uniqs = np.unique(obs["semantic"]) 28 | detected_cats = self.env.instance_to_cat[uniqs] 29 | output = np.zeros(self.shape, dtype=self.dtype) 30 | output[detected_cats] = 1 31 | obs["semantic"] = output 32 | return obs 33 | 34 | def from_config(cls, config): 35 | # TODO: Figure out if we need this 36 | raise NotImplementedError() 37 | -------------------------------------------------------------------------------- /src/preprocessors/semantic/quantized_mesh.py: -------------------------------------------------------------------------------- 1 | import habitat 2 | import cv2 3 | import numpy as np 4 | import torch 5 | from gym import spaces 6 | import habitat_sim 7 | from habitat_baselines.common.obs_transformers import ObservationTransformer 8 | from typing import List, Any, Union, Optional, cast, Dict 9 | from semantic_colors import COLORS64 10 | 11 | 12 | class QuantizedSemanticMask(ObservationTransformer): 13 | """Downsample an incoming observation of [1,m,n] to [43, m / height_fac, n / width_fac]. This first lookups semantic categories from object IDs, then expands each unique semantic value into an image channel, then downsamples the image.""" 14 | 15 | def __init__(self, env, num_cats=43, height_fac=1, width_fac=1): 16 | self.dtype = np.float32 17 | self.num_cats = num_cats 18 | self.env = env 19 | self.facs = np.array((height_fac, width_fac), dtype=np.int32) 20 | super().__init__() 21 | 22 | def transform_observation_space(self, obs_space): 23 | if "semantic" not in obs_space.spaces: 24 | return obs_space 25 | 26 | self.sensor_shape = np.array( 27 | ( 28 | self.env.hab_cfg.SIMULATOR.SEMANTIC_SENSOR.HEIGHT, 29 | self.env.hab_cfg.SIMULATOR.SEMANTIC_SENSOR.WIDTH, 30 | ), 31 | dtype=np.int32, 32 | ) 33 | self.shape = (self.num_cats, *(self.sensor_shape // self.facs)) 34 | 35 | assert np.isclose( 36 | self.facs * self.shape[1:], self.sensor_shape 37 | ).all(), "Shapes do not align, change the factors" 38 | 39 | obs_space.spaces["semantic"] = spaces.Box( 40 | low=0, 41 | high=1, 42 | shape=self.shape, 43 | dtype=self.dtype, 44 | ) 45 | 46 | return obs_space 47 | 48 | def forward(self, obs): 49 | if "semantic" not in obs: 50 | return obs 51 | 52 | # Convert to category 53 | new_sem = self.env.convert_sem_obs(obs) 54 | # Save on computation 55 | unique_cats = np.unique(new_sem) 56 | out_img = np.zeros(self.shape) 57 | # For each unique category, build a downsized layer 58 | # Do this at the same time to reduce memory usage 59 | tmp = np.zeros((self.num_cats, *self.sensor_shape)) 60 | for cat in unique_cats: 61 | tmp[cat, :, :] = torch.Tensor(new_sem == cat) 62 | 63 | downsized = torch.nn.functional.max_pool2d( 64 | torch.Tensor(tmp), self.facs.tolist() 65 | ) 66 | out_img = downsized.squeeze().numpy() 67 | # out_img[cat,:,:] = downsized_layer.squeeze().numpy() 68 | 69 | obs["semantic"] = out_img 70 | self.downsized_sem = out_img 71 | 72 | return obs 73 | 74 | def visualize(self): 75 | img = np.zeros((*self.shape[1:], 3), dtype=np.uint8) 76 | for layer_idx in range(self.downsized_sem.shape[0]): 77 | layer = self.downsized_sem[layer_idx, :, :] 78 | filled_px = np.argwhere(layer == 1) 79 | img[filled_px[:, 0], filled_px[:, 1]] = COLORS64[layer_idx] 80 | return cv2.resize( 81 | img, tuple(self.sensor_shape), interpolation=cv2.INTER_NEAREST 82 | ) 83 | 84 | def from_config(cls, config): 85 | # TODO: Figure out if we need this 86 | raise NotImplementedError() 87 | -------------------------------------------------------------------------------- /src/profile_mvs.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import cProfile 3 | import numpy as np 4 | import time 5 | 6 | img1 = cv2.imread("img0.jpg", 0) # queryimage # left image 7 | img2 = cv2.imread("img1.jpg", 0) # trainimage # right image 8 | 9 | 10 | orb = cv2.ORB_create(nfeatures=32) 11 | bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) 12 | 13 | 14 | # create BFMatcher object 15 | 16 | # FLANN parameters 17 | FLANN_INDEX_KDTREE = 1 18 | index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) 19 | search_params = dict(checks=50) # or pass empty dictionary 20 | flann = cv2.FlannBasedMatcher(index_params, search_params) 21 | 22 | 23 | p = cProfile.Profile() 24 | p.enable() 25 | start = time.time() 26 | # find the keypoints and descriptors with SIFT 27 | 28 | kp1, des1 = orb.detectAndCompute(img1, None) 29 | kp2, des2 = orb.detectAndCompute(img2, None) 30 | # Match descriptors. 31 | matches = flann.knnMatch(des1.astype("float32"), des2.astype("float32"), k=2) 32 | pts1 = [] 33 | pts2 = [] 34 | 35 | # ratio test as per Lowe's paper 36 | for m, n in matches: 37 | pts2.append(kp2[m.trainIdx].pt) 38 | pts1.append(kp1[m.queryIdx].pt) 39 | 40 | 41 | pts1 = np.int32(pts1[:16]) 42 | pts2 = np.int32(pts2[:16]) 43 | F, mask = cv2.findFundamentalMat(pts1, pts2, cv2.FM_LMEDS) 44 | 45 | end = time.time() 46 | print(f"Took {end - start}s") 47 | p.disable() 48 | p.print_stats() 49 | -------------------------------------------------------------------------------- /src/recall_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import random 3 | import numpy as np 4 | from collections import OrderedDict 5 | 6 | 7 | DEFAULT_CFG = { 8 | "dim": 2, 9 | "max_items": 4, 10 | "max_queries": 2, 11 | # If we always want to recall the n'th elements 12 | "deterministic": False, 13 | # Use -1 to denote a false (random) element 14 | "determinstic_idxs": [], 15 | } 16 | 17 | 18 | class RecallEnv(gym.Env): 19 | """Recall memory environment. 20 | 21 | Obs -> [is_recall: [0,1] , item: [R^n]] 22 | Action -> [seen_previously: [0,1]] 23 | """ 24 | 25 | def __init__(self, cfg={}): # dim=8, max_items=32, max_queries=8): 26 | self.cfg = dict(DEFAULT_CFG, **cfg) 27 | self.max_items = self.cfg["max_items"] 28 | self.max_queries = self.cfg["max_queries"] 29 | self.curr_t = 0 30 | self.prev_queries = [] 31 | self.prev_obs = [] 32 | self.dim = self.cfg["dim"] 33 | """ 34 | self.det = self.cfg['deterministic'] 35 | self.det_idxs = self.cfg['deterministic_idxs'] 36 | 37 | if self.det: 38 | assert len(self.det_idxs) == max_queries, "Max queries must equal len(deterministic_idxs)" 39 | """ 40 | 41 | is_recall = gym.spaces.Discrete(2) 42 | item = gym.spaces.Box( 43 | shape=(self.dim,), 44 | low=np.finfo(np.float32).min, 45 | high=np.finfo(np.float32).max, 46 | dtype=np.float32, 47 | ) 48 | timestep = gym.spaces.Discrete(self.max_items + self.max_queries + 2) 49 | self.observation_space = gym.spaces.Dict( 50 | {"is_recall": is_recall, "item": item, "timestep": timestep} 51 | ) 52 | 53 | # Seen previously, 1 is true and 0 is false 54 | self.action_space = gym.spaces.Discrete(2) 55 | 56 | def get_reward_range(self): 57 | return [0, 1] 58 | 59 | def get_done(self, obs): 60 | # +1 is for the weird mode between train/test 61 | if self.curr_t == self.max_items + self.max_queries + 1: 62 | return True 63 | 64 | return False 65 | 66 | def step(self, action): 67 | is_recall = -1 68 | item = None 69 | reward = 0 70 | 71 | # Train mode 72 | if self.curr_t < self.max_items - 1: 73 | is_recall = 0 74 | item = np.random.normal(size=self.dim) 75 | self.prev_queries.append(item) 76 | # First round of recall, we don't expect a valid action/reward here 77 | elif self.curr_t == self.max_items - 1: 78 | # Item is either in previous item or never seen 79 | is_recall = 1 80 | item = random.choice( 81 | [random.choice(self.prev_queries), np.random.normal(size=self.dim)] 82 | ) 83 | # Recall/test mode 84 | else: 85 | # For next 86 | is_recall = 1 87 | last_obs = self.prev_obs[-1] 88 | is_prev_item = any(last_obs["item"] is q for q in self.prev_queries) 89 | if is_prev_item and action == 1: 90 | reward = 1 91 | self.correct += 1 92 | self.true_positives += 1 93 | elif not is_prev_item and action == 0: 94 | reward = 1 95 | self.correct += 1 96 | self.true_negatives += 1 97 | 98 | # Item is either in previous item or never seen 99 | item = random.choice( 100 | [random.choice(self.prev_queries), np.random.normal(size=self.dim)] 101 | ) 102 | 103 | self.curr_t += 1 104 | obs = OrderedDict( 105 | [("is_recall", is_recall), ("item", item), ("timestep", self.curr_t)] 106 | ) 107 | self.prev_obs.append(obs) 108 | 109 | done = self.get_done(obs) 110 | info = { 111 | "num_correct": self.correct, 112 | "true_positives": self.true_positives, 113 | "true_negatives": self.true_negatives, 114 | } 115 | return obs, reward, done, info 116 | 117 | def reset(self): 118 | self.curr_t = 0 119 | self.correct = 0 120 | self.true_positives = 0 121 | self.true_negatives = 0 122 | self.prev_queries.clear() 123 | self.prev_obs.clear() 124 | 125 | is_recall = 0 126 | item = np.random.normal(size=self.dim) 127 | self.prev_queries.append(item) 128 | obs = OrderedDict( 129 | [("is_recall", is_recall), ("item", item), ("timestep", self.curr_t)] 130 | ) 131 | 132 | return obs 133 | 134 | def play(self): 135 | while True: 136 | action = 0 137 | done = False 138 | obs = self.reset() 139 | print(obs) 140 | while not done: 141 | obs, reward, done, info = self.step(action) 142 | print(obs) 143 | if obs["is_recall"]: 144 | print("Reward is:", reward) 145 | action = int(input("Enter action (0,1): ")) 146 | print("Game over, restarting") 147 | 148 | 149 | if __name__ == "__main__": 150 | e = RecallEnv() 151 | e.play() 152 | -------------------------------------------------------------------------------- /src/rewards/basic.py: -------------------------------------------------------------------------------- 1 | class BasicReward: 2 | def __init__(self): 3 | pass 4 | 5 | def on_env_load(self, env): 6 | self.env = env 7 | 8 | def get_reward_range(self): 9 | return [0.0, 1.0] 10 | 11 | def get_reward(self, obs): 12 | r = 0.0 13 | if self.env.habitat_env.get_metrics()["success"]: 14 | r += 1.0 15 | return r 16 | 17 | def reset(self): 18 | pass 19 | -------------------------------------------------------------------------------- /src/rewards/collision.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class CollisionReward: 5 | """Provides a reward for aligning the agent motion vector with the 6 | goal vector. To inhibit moving back and forth to game the system, 7 | the reward is provided only if the agent is closer to the target 8 | than it has been before""" 9 | 10 | def __init__(self): 11 | self.rr = [-0.005, 0.0] 12 | self.past_states = [] 13 | 14 | def on_env_load(self, env): 15 | self.env = env 16 | 17 | def get_reward_range(self): 18 | return self.rr 19 | 20 | def get_reward(self, obs, grid_size=0.5): 21 | if self.env.habitat_env._sim.previous_step_collided: 22 | return self.rr[0] 23 | return self.rr[1] 24 | 25 | def reset(self): 26 | pass 27 | -------------------------------------------------------------------------------- /src/rewards/explore.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class ExplorationReward: 5 | """Provides a reward for aligning the agent motion vector with the 6 | goal vector. To inhibit moving back and forth to game the system, 7 | the reward is provided only if the agent is closer to the target 8 | than it has been before""" 9 | 10 | def __init__(self): 11 | self.rr = [0.0, 0.01] 12 | self.past_states = [] 13 | 14 | def on_env_load(self, env): 15 | self.env = env 16 | 17 | def get_reward_range(self): 18 | return self.rr 19 | 20 | def get_reward(self, obs, grid_size=0.2): 21 | curr_state = self.env.habitat_env._sim.get_agent_state() 22 | 23 | for s in self.past_states: 24 | if np.linalg.norm(s.position - curr_state.position) < grid_size: 25 | return self.rr[0] 26 | 27 | self.past_states.append(curr_state) 28 | 29 | return self.rr[1] 30 | 31 | def reset(self): 32 | curr_state = self.env.habitat_env._sim.get_agent_state() 33 | self.past_states.clear() 34 | # Add first state so we dont get free reward 35 | self.past_states.append(curr_state) 36 | -------------------------------------------------------------------------------- /src/rewards/path.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower 3 | 4 | 5 | class PathReward: 6 | """Provides a reward for aligning the agent motion vector with the 7 | goal vector. To inhibit moving back and forth to game the system, 8 | the reward is provided only if the agent is closer to the target 9 | than it has been before""" 10 | 11 | def __init__(self): 12 | self.rr = [0.0, 0.01] 13 | 14 | def on_env_load(self, env): 15 | self.env = env 16 | self.follower = ShortestPathFollower( 17 | self.env.habitat_env.sim, 18 | self.env.episodes[0].goals[0].radius, 19 | return_one_hot=False, 20 | ) 21 | 22 | def get_reward_range(self): 23 | return self.rr 24 | 25 | def get_reward(self, obs): 26 | curr_state = self.env.habitat_env._sim.get_agent_state() 27 | goal_state = self.env.habitat_env.current_episode.goals[0] 28 | dist2goal = np.linalg.norm(goal_state.position - curr_state.position) 29 | # Prevent jittering, only reward if making progress 30 | if dist2goal >= self.closest: 31 | return 0.0 32 | 33 | self.closest = dist2goal 34 | 35 | # Cosine distance between current move and goal vector 36 | move_vec = curr_state.position - self.last_state.position 37 | move_vec /= np.linalg.norm(move_vec) 38 | goal_vec = goal_state.position - self.last_state.position 39 | goal_vec /= np.linalg.norm(goal_vec) 40 | reward_scale = max(0.0, np.dot(move_vec, goal_vec)) 41 | reward = reward_scale * self.rr[1] 42 | 43 | self.last_state = curr_state 44 | return reward 45 | 46 | def reset(self): 47 | self.last_state = self.env.habitat_env._sim.get_agent_state() 48 | self.closest = np.linalg.norm( 49 | self.env.habitat_env.current_episode.goals[0].position 50 | - self.last_state.position 51 | ) 52 | -------------------------------------------------------------------------------- /src/semantic_colors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | COLORS64 = np.array( 5 | [ 6 | 0, 7 | 0, 8 | 0, 9 | 1, 10 | 0, 11 | 103, 12 | 213, 13 | 255, 14 | 0, 15 | 255, 16 | 0, 17 | 86, 18 | 158, 19 | 0, 20 | 142, 21 | 14, 22 | 76, 23 | 161, 24 | 255, 25 | 229, 26 | 2, 27 | 0, 28 | 95, 29 | 57, 30 | 0, 31 | 255, 32 | 0, 33 | 149, 34 | 0, 35 | 58, 36 | 255, 37 | 147, 38 | 126, 39 | 164, 40 | 36, 41 | 0, 42 | 0, 43 | 21, 44 | 68, 45 | 145, 46 | 208, 47 | 203, 48 | 98, 49 | 14, 50 | 0, 51 | 107, 52 | 104, 53 | 130, 54 | 0, 55 | 0, 56 | 255, 57 | 0, 58 | 125, 59 | 181, 60 | 106, 61 | 130, 62 | 108, 63 | 0, 64 | 174, 65 | 126, 66 | 194, 67 | 140, 68 | 159, 69 | 190, 70 | 153, 71 | 112, 72 | 0, 73 | 143, 74 | 156, 75 | 95, 76 | 173, 77 | 78, 78 | 255, 79 | 0, 80 | 0, 81 | 255, 82 | 0, 83 | 246, 84 | 255, 85 | 2, 86 | 157, 87 | 104, 88 | 61, 89 | 59, 90 | 255, 91 | 116, 92 | 163, 93 | 150, 94 | 138, 95 | 232, 96 | 152, 97 | 255, 98 | 82, 99 | 167, 100 | 87, 101 | 64, 102 | 1, 103 | 255, 104 | 254, 105 | 255, 106 | 238, 107 | 232, 108 | 254, 109 | 137, 110 | 0, 111 | 189, 112 | 198, 113 | 255, 114 | 1, 115 | 208, 116 | 255, 117 | 187, 118 | 136, 119 | 0, 120 | 117, 121 | 68, 122 | 177, 123 | 165, 124 | 255, 125 | 210, 126 | 255, 127 | 166, 128 | 254, 129 | 119, 130 | 77, 131 | 0, 132 | 122, 133 | 71, 134 | 130, 135 | 38, 136 | 52, 137 | 0, 138 | 0, 139 | 71, 140 | 84, 141 | 67, 142 | 0, 143 | 44, 144 | 181, 145 | 0, 146 | 255, 147 | 255, 148 | 177, 149 | 103, 150 | 255, 151 | 219, 152 | 102, 153 | 144, 154 | 251, 155 | 146, 156 | 126, 157 | 45, 158 | 210, 159 | 189, 160 | 211, 161 | 147, 162 | 229, 163 | 111, 164 | 254, 165 | 222, 166 | 255, 167 | 116, 168 | 0, 169 | 255, 170 | 120, 171 | 0, 172 | 155, 173 | 255, 174 | 0, 175 | 100, 176 | 1, 177 | 0, 178 | 118, 179 | 255, 180 | 133, 181 | 169, 182 | 0, 183 | 0, 184 | 185, 185 | 23, 186 | 120, 187 | 130, 188 | 49, 189 | 0, 190 | 255, 191 | 198, 192 | 255, 193 | 110, 194 | 65, 195 | 232, 196 | 94, 197 | 190, 198 | ] 199 | ).reshape(64, 3) 200 | -------------------------------------------------------------------------------- /src/server/render.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import glob 4 | import io 5 | import multiprocessing 6 | from flask import Flask, render_template, Response 7 | from flask_socketio import SocketIO, emit 8 | from PIL import Image 9 | from pathlib import Path 10 | from typing import List, Generator, Union, Dict, Any 11 | import numpy as np 12 | import logging 13 | 14 | 15 | # The render server exists to serve images written to /dev/shm 16 | # by workers over a web interface for debugging purposes 17 | 18 | 19 | RENDER_ROOT = "/dev/shm/render/" 20 | CLIENT_LOCK = Path(f"{RENDER_ROOT}/client_conn.lock") 21 | conn_clients = 0 22 | ACTION_QUEUE: Union[multiprocessing.Queue, None] 23 | RESPONSE_QUEUE: Union[multiprocessing.Queue, None] 24 | 25 | app = Flask(__name__) 26 | socketio = SocketIO(app) 27 | 28 | 29 | @socketio.on("action_input", namespace="/") 30 | def action_input(msg) -> None: 31 | global ACTION_QUEUE, RESPONSE_QUEUE 32 | 33 | char = chr(msg["data"]) 34 | if not ACTION_QUEUE: 35 | print("Action queue does not exist, interactive control does not work") 36 | ACTION_QUEUE.put(char) # type: ignore 37 | 38 | env_data: Dict[str, Any] = RESPONSE_QUEUE.get() 39 | emit("env_response", env_data) 40 | 41 | 42 | # SocketIO functions use a lock file for a web client connection 43 | # This ensures we are not wasting cycles producing images if there 44 | # are no viewers 45 | @socketio.on("connect", namespace="/") 46 | def connect() -> None: 47 | global conn_clients 48 | conn_clients += 1 49 | print(f"Client connected, clients: {conn_clients}") 50 | if conn_clients >= 0: 51 | print("Enabling visualization") 52 | CLIENT_LOCK.touch() 53 | 54 | 55 | @socketio.on("disconnect", namespace="/") 56 | def disconnect() -> None: 57 | global conn_clients 58 | conn_clients -= 1 59 | print(f"Client disconnected, clients: {conn_clients}") 60 | if conn_clients <= 0: 61 | print("Disabling visualization") 62 | CLIENT_LOCK.unlink(missing_ok=True) 63 | 64 | 65 | # Default load page 66 | @app.route("/") 67 | def index(): 68 | return render_template("index.html") 69 | 70 | 71 | def concat_v_from_paths(img_paths: List[str]) -> np.ndarray: 72 | imgs = [Image.open(p) for p in img_paths] 73 | dst = Image.new("RGB", (imgs[0].width, imgs[0].height * len(imgs))) 74 | for i in range(len(imgs)): 75 | dst.paste(imgs[i], (0, imgs[0].height * i)) 76 | return dst 77 | 78 | 79 | def concat_v_from_imgs(imgs: List[np.ndarray]) -> np.ndarray: 80 | dst = Image.new("RGB", (imgs[0].width, imgs[0].height * len(imgs))) 81 | for i in range(len(imgs)): 82 | dst.paste(imgs[i], (0, imgs[0].height * i)) 83 | return dst 84 | 85 | 86 | def concat_h_from_paths(img_paths: List[str]) -> np.ndarray: 87 | imgs = [Image.open(p) for p in img_paths] 88 | dst = Image.new( 89 | "RGB", (sum(img.width for img in imgs), max(img.height for img in imgs)) 90 | ) 91 | for i in range(len(imgs)): 92 | if i == 0: 93 | hpos = 0 94 | else: 95 | hpos = sum(img.width for img in imgs[:i]) 96 | dst.paste(imgs[i], (hpos, 0)) 97 | return dst 98 | 99 | 100 | last_exc = time.time() 101 | 102 | 103 | def gen() -> Generator[bytes, None, None]: 104 | global last_exc 105 | while True: 106 | try: 107 | if os.path.isdir(RENDER_ROOT): 108 | render_dirs = sorted( 109 | [f.path for f in os.scandir(RENDER_ROOT) if f.is_dir()] 110 | ) 111 | # TODO: Select any proc, not just the first 112 | render_dirs = [render_dirs[0]] 113 | img_paths = [sorted(glob.glob(f"{d}/*.jpg")) for d in render_dirs] 114 | if any(img_paths): 115 | # Concatenate images in the same dir (rgb, graph, etc) horizontally 116 | # and concatenate different dirs (worker1, worker2, etc) vertically 117 | h_imgs = [concat_h_from_paths(d) for d in img_paths] 118 | final_img = concat_v_from_imgs(h_imgs) 119 | buf = io.BytesIO() 120 | final_img.save(buf, format="jpeg") 121 | yield ( 122 | b"--frame\r\n" 123 | b"Content-Type: image/jpeg\r\n\r\n" + buf.getvalue() + b"\r\n" 124 | ) 125 | except Exception as e: 126 | # Do not spam console and pin CPU 127 | if last_exc - time.time() > 1: 128 | print("Render server failed to serve image") 129 | print(e) 130 | last_exc = time.time() 131 | 132 | 133 | @app.route("/video_feed") 134 | def video_feed(): 135 | return Response(gen(), mimetype="multipart/x-mixed-replace; boundary=frame") 136 | 137 | 138 | def main(action_queue=None, response_queue=None): 139 | logging.getLogger("socketio").setLevel(logging.ERROR) 140 | logging.getLogger("engineio").setLevel(logging.ERROR) 141 | logging.getLogger("werkzeug").setLevel(logging.ERROR) 142 | global ACTION_QUEUE, RESPONSE_QUEUE 143 | ACTION_QUEUE = action_queue 144 | RESPONSE_QUEUE = response_queue 145 | socketio.run(app, host="0.0.0.0", debug=True, use_reloader=False) 146 | 147 | 148 | if __name__ == "__main__": 149 | main() 150 | -------------------------------------------------------------------------------- /src/server/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 25 | Video Streaming Demonstration 26 | 27 | 28 |

Video Streaming Demonstration

29 | 30 |
31 | Move: W,A,S,D
32 | Look: E,Q
33 | Stop: Spacebar
34 | 35 | 36 |
37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/sizes.txt: -------------------------------------------------------------------------------- 1 | { 2 | "x": [ 3 | 2, 4 | 4, 5 | 6, 6 | 8, 7 | 10, 8 | 12, 9 | 14, 10 | 16, 11 | 18, 12 | 20, 13 | 22, 14 | 24, 15 | 26, 16 | 28, 17 | 30, 18 | 32 19 | ], 20 | "MLP": [ 21 | 26, 22 | 66, 23 | 122, 24 | 194, 25 | 282, 26 | 386, 27 | 506, 28 | 642, 29 | 794, 30 | 962, 31 | 1146, 32 | 1346, 33 | 1562, 34 | 1794, 35 | 2042, 36 | 2306 37 | ], 38 | "LSTM": [ 39 | 72, 40 | 188, 41 | 352, 42 | 564, 43 | 824, 44 | 1132, 45 | 1488, 46 | 1892, 47 | 2344, 48 | 2844, 49 | 3392, 50 | 3988, 51 | 4632, 52 | 5324, 53 | 6064, 54 | 6852 55 | ], 56 | "GTrXL": [ 57 | 128, 58 | 420, 59 | 880, 60 | 1508, 61 | 2304, 62 | 3268, 63 | 4400, 64 | 5700, 65 | 7168, 66 | 8804, 67 | 10608, 68 | 12580, 69 | 14720, 70 | 17028, 71 | 19504, 72 | 22148 73 | ], 74 | "DNC": [ 75 | 293, 76 | 879, 77 | 1785, 78 | 3011, 79 | 4557, 80 | 6423, 81 | 8609, 82 | 11115, 83 | 13941, 84 | 17087, 85 | 20553, 86 | 24339, 87 | 28445, 88 | 32871, 89 | 37617, 90 | 42683 91 | ], 92 | "GCM": [ 93 | 30, 94 | 90, 95 | 182, 96 | 306, 97 | 462, 98 | 650, 99 | 870, 100 | 1122, 101 | 1406, 102 | 1722, 103 | 2070, 104 | 2450, 105 | 2862, 106 | 3306, 107 | 3782, 108 | 4290 109 | ] 110 | } 111 | -------------------------------------------------------------------------------- /src/util.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | 4 | def load_class(cfg, key): 5 | try: 6 | module = importlib.import_module(cfg[key]["module"]) 7 | except KeyError: 8 | print(f'Did not find key [{key}]["module"] in cfg, ensure it is set') 9 | raise 10 | except Exception: 11 | print(f"Failed to load module {cfg[key]}, ensure" " module is set correctly.") 12 | raise 13 | 14 | try: 15 | cls = getattr(module, cfg[key]["class"]) 16 | except KeyError: 17 | print(f'Did not find key [{key}]["class"] in cfg, ensure it is set') 18 | raise 19 | except Exception: 20 | print(f"Failed to load class {cfg[key]}, ensure class " "is set correctly.") 21 | raise 22 | 23 | return cls 24 | -------------------------------------------------------------------------------- /src/val_to_video.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pickle 3 | import os 4 | from typing import Dict, Any 5 | import numpy as np 6 | import cv2 7 | import networkx as nx 8 | import matplotlib.pyplot as plt 9 | import io 10 | import torch 11 | 12 | np.set_printoptions(formatter={"float": lambda x: "{0:0.2f}".format(x)}) 13 | 14 | 15 | def generate_ego_graph(gps, obs_imgs, edges, frame): 16 | cur_gps = gps[frame] 17 | neigh_frames = edges[frame].nonzero()[0] 18 | neigh_gps = gps[neigh_frames] 19 | # Make everything ego-relative 20 | neigh_gps -= cur_gps 21 | cur_gps -= cur_gps 22 | 23 | # Img is [B, 1, H, W] 24 | # we want it [B, H, W, 3] for opencv 25 | all_imgs = obs_imgs[[frame, *neigh_frames.tolist()]] 26 | all_imgs = np.tile(np.swapaxes(all_imgs, 1, -1), [1, 1, 1, 3]) 27 | 28 | G = nx.Graph() 29 | 30 | all_nodes = np.concatenate( 31 | (cur_gps.reshape(1, 2), neigh_gps.reshape(-1, 2)), axis=0 32 | ) 33 | G.add_nodes_from( 34 | [ 35 | ( 36 | i, 37 | { 38 | "pos": all_nodes[i], 39 | "label_pos": all_nodes[i] + np.array([0, -0.08]), 40 | "img": all_imgs[i], 41 | }, 42 | ) 43 | for i in range(all_nodes.shape[0]) 44 | ] 45 | ) 46 | G.add_edges_from([(0, i) for i in range(1, all_nodes.shape[0])]) 47 | return G 48 | 49 | 50 | def render_ego_graph(G): 51 | fig = plt.figure() 52 | drawpos = nx.get_node_attributes(G, "pos") 53 | labelpos = nx.get_node_attributes(G, "label_pos") 54 | imgs = nx.get_node_attributes(G, "img") 55 | nx.draw(G, drawpos) 56 | nx.draw_networkx_labels(G, labels=drawpos, pos=labelpos) 57 | trans = plt.gca().transData.transform 58 | trans2 = fig.transFigure.inverted().transform 59 | 60 | img_size = 0.1 61 | p2 = img_size / 2.0 62 | offset = 0.08 63 | plt.xlim(-1.5, 1.5) 64 | plt.ylim(-1.5, 1.5) 65 | for i in imgs: 66 | xx, yy = trans(drawpos[i]) # figure coordinates 67 | xa, ya = trans2((xx, yy)) # axes coordinates 68 | a = plt.axes([xa - p2, ya - p2 + offset, img_size, img_size]) 69 | # a.set_aspect('equal') 70 | a.imshow(imgs[i]) 71 | a.axis("off") 72 | # import pdb; pdb.set_trace() 73 | # plt.figimage(imgs[i], drawpos[i][0], plt.ylim()[1] - drawpos[i][1]) 74 | plt.draw() 75 | return fig 76 | 77 | 78 | def load_vae(path="/root/vnav/depth_vae.pt"): 79 | return torch.load(path).to("cpu").eval() 80 | 81 | 82 | def latent_to_img(net, latents): 83 | with torch.no_grad(): 84 | # Format: [B, C, H, W] 85 | out = net.decode(torch.from_numpy(latents)).numpy() 86 | return (out * 255).astype(np.uint8) 87 | 88 | 89 | def get_ego_edges(gps, edges, frame): 90 | cur_gps = gps[frame] 91 | neigh_frames = edges[frame].nonzero()[0] 92 | neigh_gps = gps[neigh_frames] 93 | # Make everything ego-relative 94 | neigh_gps -= cur_gps 95 | cur_gps -= cur_gps 96 | 97 | 98 | def process(data: Dict[str, np.ndarray], vae: torch.nn.Module, outdir: str): 99 | depths = latent_to_img(vae, data["latent"]) 100 | for frame in range(data["action_prob"].shape[0]): 101 | G = generate_ego_graph(data["gps"], depths, data["forward_edges"], frame) 102 | fig = render_ego_graph(G) 103 | fig.savefig(f"{outdir}/{frame}.jpg") 104 | plt.close(fig) 105 | # ep_imgs += [render_ego_graph(G)] 106 | 107 | 108 | def main(): 109 | wdir = sys.argv[1] + "/validation" 110 | files = [f"{wdir}/{f}" for f in os.listdir(wdir) if os.path.isfile(f"{wdir}/{f}")] 111 | vae = load_vae() 112 | for f in files: 113 | with open(f, "rb") as fp: 114 | data = pickle.load(fp) 115 | outdir = f"/tmp/gviz/{os.path.basename(f)}" 116 | os.makedirs(outdir, exist_ok=True) 117 | process(data, vae, outdir) 118 | 119 | 120 | if __name__ == "__main__": 121 | main() 122 | -------------------------------------------------------------------------------- /vae.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smorad/graph-conv-memory-paper/4401b87703a631e85347efa665d822e31a41b1c6/vae.pt --------------------------------------------------------------------------------