├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── agents
    ├── sem_exp.py
    └── utils
    │   ├── semantic_prediction.py
    │   └── visualization.py
├── algo
    ├── __init__.py
    └── ppo.py
├── arguments.py
├── configs
    ├── Base-RCNN-FPN.yaml
    └── COCO-InstanceSegmentation
    │   └── mask_rcnn_R_50_FPN_3x.yaml
├── constants.py
├── docs
    ├── DOCKER_INSTRUCTIONS.md
    ├── INSTRUCTIONS.md
    ├── example.gif
    ├── legend.png
    └── overview.jpg
├── envs
    ├── __init__.py
    ├── habitat
    │   ├── __init__.py
    │   ├── configs
    │   │   └── tasks
    │   │   │   └── objectnav_gibson.yaml
    │   ├── objectgoal_env.py
    │   └── utils
    │   │   └── vector_env.py
    └── utils
    │   ├── depth_utils.py
    │   ├── fmm_planner.py
    │   ├── map_builder.py
    │   ├── pose.py
    │   └── rotation_utils.py
├── main.py
├── model.py
├── requirements.txt
├── test.py
└── utils
    ├── distributions.py
    ├── model.py
    ├── optimization.py
    └── storage.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # vim
132 | *.swo
133 | *.swp
134 | 
135 | .idea/
136 | .vscode/
137 | *.DS_Store
138 | 
139 | # Data and log folders
140 | saved/
141 | tmp/
142 | pretrained_models/
143 | data/
144 | data
145 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Base image
 2 | FROM nvidia/cudagl:10.1-devel-ubuntu16.04
 3 | 
 4 | # Setup basic packages
 5 | RUN apt-get update && apt-get install -y --no-install-recommends \
 6 |     build-essential \
 7 |     git \
 8 |     curl \
 9 |     vim \
10 |     ca-certificates \
11 |     libjpeg-dev \
12 |     libpng-dev \
13 |     libglfw3-dev \
14 |     libglm-dev \
15 |     libx11-dev \
16 |     libomp-dev \
17 |     libegl1-mesa-dev \
18 |     pkg-config \
19 |     wget \
20 |     zip \
21 |     htop \
22 |     tmux \
23 |     unzip &&\
24 |     rm -rf /var/lib/apt/lists/*
25 | 
26 | # Install conda
27 | RUN wget -O $HOME/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh  &&\
28 |     chmod +x ~/miniconda.sh &&\
29 |     ~/miniconda.sh -b -p /custom/conda &&\
30 |     rm ~/miniconda.sh &&\
31 |     /custom/conda/bin/conda install numpy pyyaml scipy ipython mkl mkl-include &&\
32 |     /custom/conda/bin/conda clean -ya
33 | ENV PATH /custom/conda/bin:$PATH
34 | 
35 | # Install cmake
36 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.14.0/cmake-3.14.0-Linux-x86_64.sh
37 | RUN mkdir /opt/cmake
38 | RUN sh /cmake-3.14.0-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
39 | RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
40 | RUN cmake --version
41 | 
42 | # Setup habitat-sim
43 | RUN git clone https://github.com/facebookresearch/habitat-sim.git
44 | RUN /bin/bash -c "cd habitat-sim; git checkout tags/v0.1.5; pip install -r requirements.txt; python setup.py install --headless --with-cuda"
45 | 
46 | # Install challenge specific habitat-api
47 | RUN git clone https://github.com/facebookresearch/habitat-api.git
48 | RUN /bin/bash -c "cd habitat-api; git checkout tags/v0.1.5; pip install -e ."
49 | RUN /bin/bash -c "cd habitat-api; wget http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip; unzip habitat-test-scenes.zip"
50 | 
51 | # Silence habitat-sim logs
52 | ENV GLOG_minloglevel=2
53 | ENV MAGNUM_LOG="quiet"
54 | 
55 | # Install project specific packages
56 | RUN /bin/bash -c "apt-get update; apt-get install -y libsm6 libxext6 libxrender-dev; pip install opencv-python"
57 | RUN /bin/bash -c "pip install --upgrade cython numpy"
58 | RUN /bin/bash -c "pip install matplotlib seaborn==0.9.0 scikit-fmm==2019.1.30 scikit-image==0.15.0 imageio==2.6.0 scikit-learn==0.22.2.post1 ifcfg"
59 | 
60 | # Install pytorch and torch_scatter
61 | RUN conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.2 -c pytorch
62 | RUN /bin/bash -c "pip install torch_scatter"
63 | 
64 | # Install detectron2
65 | RUN /bin/bash -c "python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.6/index.html"
66 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Devendra Chaplot
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Object Goal Navigation using Goal-Oriented Semantic Exploration
  2 | This is a PyTorch implementation of the NeurIPS-20 paper:
  3 | 
  4 | [Object Goal Navigation using Goal-Oriented Semantic Exploration](https://arxiv.org/pdf/2007.00643.pdf)<br />
  5 | Devendra Singh Chaplot, Dhiraj Gandhi, Abhinav Gupta, Ruslan Salakhutdinov<br />
  6 | Carnegie Mellon University, Facebook AI Research
  7 | 
  8 | Winner of the [CVPR 2020 Habitat ObjectNav Challenge](https://aihabitat.org/challenge/2020/).
  9 | 
 10 | Project Website: https://devendrachaplot.github.io/projects/semantic-exploration
 11 | 
 12 | ![example](./docs/example.gif)
 13 | 
 14 | ### Overview:
 15 | The Goal-Oriented Semantic Exploration (SemExp) model consists of three modules: a Semantic Mapping Module, a Goal-Oriented Semantic Policy, and a deterministic Local Policy. 
 16 | As shown below, the Semantic Mapping model builds a semantic map over time. The Goal-Oriented Semantic Policy selects a long-term goal based on the semantic
 17 | map to reach the given object goal efficiently. A deterministic local policy based on analytical planners is used to take low-level navigation actions to reach the long-term goal.
 18 | 
 19 | ![overview](./docs/overview.jpg)
 20 | 
 21 | ### This repository contains:
 22 | - Episode train and test datasets for [Object Goal Navigation](https://arxiv.org/pdf/2007.00643.pdf) task for the Gibson dataset in the Habitat Simulator.
 23 | - The code to train and evaluate the Semantic Exploration (SemExp) model on the Object Goal Navigation task.
 24 | - Pretrained SemExp model.
 25 | 
 26 | ## Installing Dependencies
 27 | - We use earlier versions of [habitat-sim](https://github.com/facebookresearch/habitat-sim) and [habitat-lab](https://github.com/facebookresearch/habitat-lab) as specified below:
 28 | 
 29 | Installing habitat-sim:
 30 | ```
 31 | git clone https://github.com/facebookresearch/habitat-sim.git
 32 | cd habitat-sim; git checkout tags/v0.1.5; 
 33 | pip install -r requirements.txt; 
 34 | python setup.py install --headless
 35 | python setup.py install # (for Mac OS)
 36 | ```
 37 | 
 38 | Installing habitat-lab:
 39 | ```
 40 | git clone https://github.com/facebookresearch/habitat-lab.git
 41 | cd habitat-lab; git checkout tags/v0.1.5; 
 42 | pip install -e .
 43 | ```
 44 | Check habitat installation by running `python examples/benchmark.py` in the habitat-lab folder.
 45 | 
 46 | - Install [pytorch](https://pytorch.org/) according to your system configuration. The code is tested on pytorch v1.6.0 and cudatoolkit v10.2. If you are using conda:
 47 | ```
 48 | conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 #(Linux with GPU)
 49 | conda install pytorch==1.6.0 torchvision==0.7.0 -c pytorch #(Mac OS)
 50 | ```
 51 | 
 52 | - Install [detectron2](https://github.com/facebookresearch/detectron2/) according to your system configuration. If you are using conda:
 53 | ```
 54 | python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.6/index.html #(Linux with GPU)
 55 | CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' #(Mac OS)
 56 | ```
 57 | 
 58 | ### Docker and Singularity images:
 59 | We provide experimental [docker](https://www.docker.com/) and [singularity](https://sylabs.io/) images with all the dependencies installed, see [Docker Instructions](./docs/DOCKER_INSTRUCTIONS.md).
 60 | 
 61 | 
 62 | ## Setup
 63 | Clone the repository and install other requirements:
 64 | ```
 65 | git clone https://github.com/devendrachaplot/Object-Goal-Navigation/
 66 | cd Object-Goal-Navigation/;
 67 | pip install -r requirements.txt
 68 | ```
 69 | 
 70 | ### Downloading scene dataset
 71 | - Download the Gibson dataset using the instructions here: https://github.com/facebookresearch/habitat-lab#scenes-datasets (download the 11GB file `gibson_habitat_trainval.zip`)
 72 | - Move the Gibson scene dataset or create a symlink at `data/scene_datasets/gibson_semantic`. 
 73 | 
 74 | ### Downloading episode dataset
 75 | - Download the episode dataset:
 76 | ```
 77 | wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1tslnZAkH8m3V5nP8pbtBmaR2XEfr8Rau' -O objectnav_gibson_v1.1.zip
 78 | ```
 79 | - Unzip the dataset into `data/datasets/objectnav/gibson/v1.1/`
 80 | 
 81 | ### Setting up datasets
 82 | The code requires the datasets in a `data` folder in the following format (same as habitat-lab):
 83 | ```
 84 | Object-Goal-Navigation/
 85 |   data/
 86 |     scene_datasets/
 87 |       gibson_semantic/
 88 |         Adrian.glb
 89 |         Adrian.navmesh
 90 |         ...
 91 |     datasets/
 92 |       objectnav/
 93 |         gibson/
 94 |           v1.1/
 95 |             train/
 96 |             val/
 97 | ```
 98 | 
 99 | 
100 | ### Test setup
101 | To verify that the data is setup correctly, run:
102 | ```
103 | python test.py --agent random -n1 --num_eval_episodes 1 --auto_gpu_config 0
104 | ```
105 | 
106 | ## Usage
107 | 
108 | ### Training:
109 | For training the SemExp model on the Object Goal Navigation task:
110 | ```
111 | python main.py
112 | ```
113 | 
114 | ### Downloading pre-trained models
115 | ```
116 | mkdir pretrained_models;
117 | wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=171ZA7XNu5vi3XLpuKs8DuGGZrYyuSjL0' -O pretrained_models/sem_exp.pth
118 | ```
119 | 
120 | ### For evaluation: 
121 | For evaluating the pre-trained model:
122 | ```
123 | python main.py --split val --eval 1 --load pretrained_models/sem_exp.pth
124 | ```
125 | 
126 | For visualizing the agent observations and predicted semantic map, add `-v 1` as an argument to the above command.
127 | 
128 | The pre-trained model should get 0.657 Success, 0.339 SPL and 1.474 DTG.
129 | 
130 | For more detailed instructions, see [INSTRUCTIONS](./docs/INSTRUCTIONS.md).
131 | 
132 | 
133 | ## Cite as
134 | >Chaplot, D.S., Gandhi, D., Gupta, A. and Salakhutdinov, R., 2020. Object Goal Navigation using Goal-Oriented Semantic Exploration. In Neural Information Processing Systems (NeurIPS-20). ([PDF](https://arxiv.org/pdf/2007.00643.pdf))
135 | 
136 | ### Bibtex:
137 | ```
138 | @inproceedings{chaplot2020object,
139 |   title={Object Goal Navigation using Goal-Oriented Semantic Exploration},
140 |   author={Chaplot, Devendra Singh and Gandhi, Dhiraj and
141 |             Gupta, Abhinav and Salakhutdinov, Ruslan},
142 |   booktitle={In Neural Information Processing Systems (NeurIPS)},
143 |   year={2020}
144 |   }
145 | ```
146 | 
147 | ## Related Projects
148 | - This project builds on the [Active Neural SLAM](https://devendrachaplot.github.io/projects/Neural-SLAM) paper. The code and pretrained models for the Active Neural SLAM system are available at:
149 | https://github.com/devendrachaplot/Neural-SLAM.
150 | - The Semantic Mapping module is similar to the one used in [Semantic Curiosity](https://devendrachaplot.github.io/projects/SemanticCuriosity).
151 | 
152 | ## Acknowledgements
153 | This repository uses [Habitat Lab](https://github.com/facebookresearch/habitat-lab) implementation for running the RL environment.
154 | The implementation of PPO is borrowed from [ikostrikov/pytorch-a2c-ppo-acktr-gail](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/). 
155 | The Mask-RCNN implementation is based on the [detectron2](https://github.com/facebookresearch/detectron2/) repository. We would also like to thank Shubham Tulsiani and Saurabh Gupta for their help in implementing some parts of the code.
156 | 


--------------------------------------------------------------------------------
/agents/sem_exp.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import skimage.morphology
  6 | from PIL import Image
  7 | from torchvision import transforms
  8 | 
  9 | from envs.utils.fmm_planner import FMMPlanner
 10 | from envs.habitat.objectgoal_env import ObjectGoal_Env
 11 | from agents.utils.semantic_prediction import SemanticPredMaskRCNN
 12 | from constants import color_palette
 13 | import envs.utils.pose as pu
 14 | import agents.utils.visualization as vu
 15 | 
 16 | 
 17 | class Sem_Exp_Env_Agent(ObjectGoal_Env):
 18 |     """The Sem_Exp environment agent class. A seperate Sem_Exp_Env_Agent class
 19 |     object is used for each environment thread.
 20 | 
 21 |     """
 22 | 
 23 |     def __init__(self, args, rank, config_env, dataset):
 24 | 
 25 |         self.args = args
 26 |         super().__init__(args, rank, config_env, dataset)
 27 | 
 28 |         # initialize transform for RGB observations
 29 |         self.res = transforms.Compose(
 30 |             [transforms.ToPILImage(),
 31 |              transforms.Resize((args.frame_height, args.frame_width),
 32 |                                interpolation=Image.NEAREST)])
 33 | 
 34 |         # initialize semantic segmentation prediction model
 35 |         if args.sem_gpu_id == -1:
 36 |             args.sem_gpu_id = config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID
 37 | 
 38 |         self.sem_pred = SemanticPredMaskRCNN(args)
 39 | 
 40 |         # initializations for planning:
 41 |         self.selem = skimage.morphology.disk(3)
 42 | 
 43 |         self.obs = None
 44 |         self.obs_shape = None
 45 |         self.collision_map = None
 46 |         self.visited = None
 47 |         self.visited_vis = None
 48 |         self.col_width = None
 49 |         self.curr_loc = None
 50 |         self.last_loc = None
 51 |         self.last_action = None
 52 |         self.count_forward_actions = None
 53 | 
 54 |         if args.visualize or args.print_images:
 55 |             self.legend = cv2.imread('docs/legend.png')
 56 |             self.vis_image = None
 57 |             self.rgb_vis = None
 58 | 
 59 |     def reset(self):
 60 |         args = self.args
 61 | 
 62 |         obs, info = super().reset()
 63 |         obs = self._preprocess_obs(obs)
 64 | 
 65 |         self.obs_shape = obs.shape
 66 | 
 67 |         # Episode initializations
 68 |         map_shape = (args.map_size_cm // args.map_resolution,
 69 |                      args.map_size_cm // args.map_resolution)
 70 |         self.collision_map = np.zeros(map_shape)
 71 |         self.visited = np.zeros(map_shape)
 72 |         self.visited_vis = np.zeros(map_shape)
 73 |         self.col_width = 1
 74 |         self.count_forward_actions = 0
 75 |         self.curr_loc = [args.map_size_cm / 100.0 / 2.0,
 76 |                          args.map_size_cm / 100.0 / 2.0, 0.]
 77 |         self.last_action = None
 78 | 
 79 |         if args.visualize or args.print_images:
 80 |             self.vis_image = vu.init_vis_image(self.goal_name, self.legend)
 81 | 
 82 |         return obs, info
 83 | 
 84 |     def plan_act_and_preprocess(self, planner_inputs):
 85 |         """Function responsible for planning, taking the action and
 86 |         preprocessing observations
 87 | 
 88 |         Args:
 89 |             planner_inputs (dict):
 90 |                 dict with following keys:
 91 |                     'map_pred'  (ndarray): (M, M) map prediction
 92 |                     'goal'      (ndarray): (M, M) mat denoting goal locations
 93 |                     'pose_pred' (ndarray): (7,) array denoting pose (x,y,o)
 94 |                                  and planning window (gx1, gx2, gy1, gy2)
 95 |                      'found_goal' (bool): whether the goal object is found
 96 | 
 97 |         Returns:
 98 |             obs (ndarray): preprocessed observations ((4+C) x H x W)
 99 |             reward (float): amount of reward returned after previous action
100 |             done (bool): whether the episode has ended
101 |             info (dict): contains timestep, pose, goal category and
102 |                          evaluation metric info
103 |         """
104 | 
105 |         # plan
106 |         if planner_inputs["wait"]:
107 |             self.last_action = None
108 |             self.info["sensor_pose"] = [0., 0., 0.]
109 |             return np.zeros(self.obs.shape), 0., False, self.info
110 | 
111 |         # Reset reward if new long-term goal
112 |         if planner_inputs["new_goal"]:
113 |             self.info["g_reward"] = 0
114 | 
115 |         action = self._plan(planner_inputs)
116 | 
117 |         if self.args.visualize or self.args.print_images:
118 |             self._visualize(planner_inputs)
119 | 
120 |         if action >= 0:
121 | 
122 |             # act
123 |             action = {'action': action}
124 |             obs, rew, done, info = super().step(action)
125 | 
126 |             # preprocess obs
127 |             obs = self._preprocess_obs(obs) 
128 |             self.last_action = action['action']
129 |             self.obs = obs
130 |             self.info = info
131 | 
132 |             info['g_reward'] += rew
133 | 
134 |             return obs, rew, done, info
135 | 
136 |         else:
137 |             self.last_action = None
138 |             self.info["sensor_pose"] = [0., 0., 0.]
139 |             return np.zeros(self.obs_shape), 0., False, self.info
140 | 
141 |     def _plan(self, planner_inputs):
142 |         """Function responsible for planning
143 | 
144 |         Args:
145 |             planner_inputs (dict):
146 |                 dict with following keys:
147 |                     'map_pred'  (ndarray): (M, M) map prediction
148 |                     'goal'      (ndarray): (M, M) goal locations
149 |                     'pose_pred' (ndarray): (7,) array  denoting pose (x,y,o)
150 |                                  and planning window (gx1, gx2, gy1, gy2)
151 |                     'found_goal' (bool): whether the goal object is found
152 | 
153 |         Returns:
154 |             action (int): action id
155 |         """
156 |         args = self.args
157 | 
158 |         self.last_loc = self.curr_loc
159 | 
160 |         # Get Map prediction
161 |         map_pred = np.rint(planner_inputs['map_pred'])
162 |         goal = planner_inputs['goal']
163 | 
164 |         # Get pose prediction and global policy planning window
165 |         start_x, start_y, start_o, gx1, gx2, gy1, gy2 = \
166 |             planner_inputs['pose_pred']
167 |         gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2)
168 |         planning_window = [gx1, gx2, gy1, gy2]
169 | 
170 |         # Get curr loc
171 |         self.curr_loc = [start_x, start_y, start_o]
172 |         r, c = start_y, start_x
173 |         start = [int(r * 100.0 / args.map_resolution - gx1),
174 |                  int(c * 100.0 / args.map_resolution - gy1)]
175 |         start = pu.threshold_poses(start, map_pred.shape)
176 | 
177 |         self.visited[gx1:gx2, gy1:gy2][start[0] - 0:start[0] + 1,
178 |                                        start[1] - 0:start[1] + 1] = 1
179 | 
180 |         if args.visualize or args.print_images:
181 |             # Get last loc
182 |             last_start_x, last_start_y = self.last_loc[0], self.last_loc[1]
183 |             r, c = last_start_y, last_start_x
184 |             last_start = [int(r * 100.0 / args.map_resolution - gx1),
185 |                           int(c * 100.0 / args.map_resolution - gy1)]
186 |             last_start = pu.threshold_poses(last_start, map_pred.shape)
187 |             self.visited_vis[gx1:gx2, gy1:gy2] = \
188 |                 vu.draw_line(last_start, start,
189 |                              self.visited_vis[gx1:gx2, gy1:gy2])
190 | 
191 |         # Collision check
192 |         if self.last_action == 1:
193 |             x1, y1, t1 = self.last_loc
194 |             x2, y2, _ = self.curr_loc
195 |             buf = 4
196 |             length = 2
197 | 
198 |             if abs(x1 - x2) < 0.05 and abs(y1 - y2) < 0.05:
199 |                 self.col_width += 2
200 |                 if self.col_width == 7:
201 |                     length = 4
202 |                     buf = 3
203 |                 self.col_width = min(self.col_width, 5)
204 |             else:
205 |                 self.col_width = 1
206 | 
207 |             dist = pu.get_l2_distance(x1, x2, y1, y2)
208 |             if dist < args.collision_threshold:  # Collision
209 |                 width = self.col_width
210 |                 for i in range(length):
211 |                     for j in range(width):
212 |                         wx = x1 + 0.05 * \
213 |                             ((i + buf) * np.cos(np.deg2rad(t1))
214 |                              + (j - width // 2) * np.sin(np.deg2rad(t1)))
215 |                         wy = y1 + 0.05 * \
216 |                             ((i + buf) * np.sin(np.deg2rad(t1))
217 |                              - (j - width // 2) * np.cos(np.deg2rad(t1)))
218 |                         r, c = wy, wx
219 |                         r, c = int(r * 100 / args.map_resolution), \
220 |                             int(c * 100 / args.map_resolution)
221 |                         [r, c] = pu.threshold_poses([r, c],
222 |                                                     self.collision_map.shape)
223 |                         self.collision_map[r, c] = 1
224 | 
225 |         stg, stop = self._get_stg(map_pred, start, np.copy(goal),
226 |                                   planning_window)
227 | 
228 |         # Deterministic Local Policy
229 |         if stop and planner_inputs['found_goal'] == 1:
230 |             action = 0  # Stop
231 |         else:
232 |             (stg_x, stg_y) = stg
233 |             angle_st_goal = math.degrees(math.atan2(stg_x - start[0],
234 |                                                     stg_y - start[1]))
235 |             angle_agent = (start_o) % 360.0
236 |             if angle_agent > 180:
237 |                 angle_agent -= 360
238 | 
239 |             relative_angle = (angle_agent - angle_st_goal) % 360.0
240 |             if relative_angle > 180:
241 |                 relative_angle -= 360
242 | 
243 |             if relative_angle > self.args.turn_angle / 2.:
244 |                 action = 3  # Right
245 |             elif relative_angle < -self.args.turn_angle / 2.:
246 |                 action = 2  # Left
247 |             else:
248 |                 action = 1  # Forward
249 | 
250 |         return action
251 | 
252 |     def _get_stg(self, grid, start, goal, planning_window):
253 |         """Get short-term goal"""
254 | 
255 |         [gx1, gx2, gy1, gy2] = planning_window
256 | 
257 |         x1, y1, = 0, 0
258 |         x2, y2 = grid.shape
259 | 
260 |         def add_boundary(mat, value=1):
261 |             h, w = mat.shape
262 |             new_mat = np.zeros((h + 2, w + 2)) + value
263 |             new_mat[1:h + 1, 1:w + 1] = mat
264 |             return new_mat
265 | 
266 |         traversible = skimage.morphology.binary_dilation(
267 |             grid[x1:x2, y1:y2],
268 |             self.selem) != True
269 |         traversible[self.collision_map[gx1:gx2, gy1:gy2]
270 |                     [x1:x2, y1:y2] == 1] = 0
271 |         traversible[self.visited[gx1:gx2, gy1:gy2][x1:x2, y1:y2] == 1] = 1
272 | 
273 |         traversible[int(start[0] - x1) - 1:int(start[0] - x1) + 2,
274 |                     int(start[1] - y1) - 1:int(start[1] - y1) + 2] = 1
275 | 
276 |         traversible = add_boundary(traversible)
277 |         goal = add_boundary(goal, value=0)
278 | 
279 |         planner = FMMPlanner(traversible)
280 |         selem = skimage.morphology.disk(10)
281 |         goal = skimage.morphology.binary_dilation(
282 |             goal, selem) != True
283 |         goal = 1 - goal * 1.
284 |         planner.set_multi_goal(goal)
285 | 
286 |         state = [start[0] - x1 + 1, start[1] - y1 + 1]
287 |         stg_x, stg_y, _, stop = planner.get_short_term_goal(state)
288 | 
289 |         stg_x, stg_y = stg_x + x1 - 1, stg_y + y1 - 1
290 | 
291 |         return (stg_x, stg_y), stop
292 | 
293 |     def _preprocess_obs(self, obs, use_seg=True):
294 |         args = self.args
295 |         obs = obs.transpose(1, 2, 0)
296 |         rgb = obs[:, :, :3]
297 |         depth = obs[:, :, 3:4]
298 | 
299 |         sem_seg_pred = self._get_sem_pred(
300 |             rgb.astype(np.uint8), use_seg=use_seg)
301 |         depth = self._preprocess_depth(depth, args.min_depth, args.max_depth)
302 | 
303 |         ds = args.env_frame_width // args.frame_width  # Downscaling factor
304 |         if ds != 1:
305 |             rgb = np.asarray(self.res(rgb.astype(np.uint8)))
306 |             depth = depth[ds // 2::ds, ds // 2::ds]
307 |             sem_seg_pred = sem_seg_pred[ds // 2::ds, ds // 2::ds]
308 | 
309 |         depth = np.expand_dims(depth, axis=2)
310 |         state = np.concatenate((rgb, depth, sem_seg_pred),
311 |                                axis=2).transpose(2, 0, 1)
312 | 
313 |         return state
314 | 
315 |     def _preprocess_depth(self, depth, min_d, max_d):
316 |         depth = depth[:, :, 0] * 1
317 | 
318 |         for i in range(depth.shape[1]):
319 |             depth[:, i][depth[:, i] == 0.] = depth[:, i].max()
320 | 
321 |         mask2 = depth > 0.99
322 |         depth[mask2] = 0.
323 | 
324 |         mask1 = depth == 0
325 |         depth[mask1] = 100.0
326 |         depth = min_d * 100.0 + depth * max_d * 100.0
327 |         return depth
328 | 
329 |     def _get_sem_pred(self, rgb, use_seg=True):
330 |         if use_seg:
331 |             semantic_pred, self.rgb_vis = self.sem_pred.get_prediction(rgb)
332 |             semantic_pred = semantic_pred.astype(np.float32)
333 |         else:
334 |             semantic_pred = np.zeros((rgb.shape[0], rgb.shape[1], 16))
335 |             self.rgb_vis = rgb[:, :, ::-1]
336 |         return semantic_pred
337 | 
338 |     def _visualize(self, inputs):
339 |         args = self.args
340 |         dump_dir = "{}/dump/{}/".format(args.dump_location,
341 |                                         args.exp_name)
342 |         ep_dir = '{}/episodes/thread_{}/eps_{}/'.format(
343 |             dump_dir, self.rank, self.episode_no)
344 |         if not os.path.exists(ep_dir):
345 |             os.makedirs(ep_dir)
346 | 
347 |         map_pred = inputs['map_pred']
348 |         exp_pred = inputs['exp_pred']
349 |         start_x, start_y, start_o, gx1, gx2, gy1, gy2 = inputs['pose_pred']
350 | 
351 |         goal = inputs['goal']
352 |         sem_map = inputs['sem_map_pred']
353 | 
354 |         gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2)
355 | 
356 |         sem_map += 5
357 | 
358 |         no_cat_mask = sem_map == 20
359 |         map_mask = np.rint(map_pred) == 1
360 |         exp_mask = np.rint(exp_pred) == 1
361 |         vis_mask = self.visited_vis[gx1:gx2, gy1:gy2] == 1
362 | 
363 |         sem_map[no_cat_mask] = 0
364 |         m1 = np.logical_and(no_cat_mask, exp_mask)
365 |         sem_map[m1] = 2
366 | 
367 |         m2 = np.logical_and(no_cat_mask, map_mask)
368 |         sem_map[m2] = 1
369 | 
370 |         sem_map[vis_mask] = 3
371 | 
372 |         selem = skimage.morphology.disk(4)
373 |         goal_mat = 1 - skimage.morphology.binary_dilation(
374 |             goal, selem) != True
375 | 
376 |         goal_mask = goal_mat == 1
377 |         sem_map[goal_mask] = 4
378 | 
379 |         color_pal = [int(x * 255.) for x in color_palette]
380 |         sem_map_vis = Image.new("P", (sem_map.shape[1],
381 |                                       sem_map.shape[0]))
382 |         sem_map_vis.putpalette(color_pal)
383 |         sem_map_vis.putdata(sem_map.flatten().astype(np.uint8))
384 |         sem_map_vis = sem_map_vis.convert("RGB")
385 |         sem_map_vis = np.flipud(sem_map_vis)
386 | 
387 |         sem_map_vis = sem_map_vis[:, :, [2, 1, 0]]
388 |         sem_map_vis = cv2.resize(sem_map_vis, (480, 480),
389 |                                  interpolation=cv2.INTER_NEAREST)
390 |         self.vis_image[50:530, 15:655] = self.rgb_vis
391 |         self.vis_image[50:530, 670:1150] = sem_map_vis
392 | 
393 |         pos = (
394 |             (start_x * 100. / args.map_resolution - gy1)
395 |             * 480 / map_pred.shape[0],
396 |             (map_pred.shape[1] - start_y * 100. / args.map_resolution + gx1)
397 |             * 480 / map_pred.shape[1],
398 |             np.deg2rad(-start_o)
399 |         )
400 | 
401 |         agent_arrow = vu.get_contour_points(pos, origin=(670, 50))
402 |         color = (int(color_palette[11] * 255),
403 |                  int(color_palette[10] * 255),
404 |                  int(color_palette[9] * 255))
405 |         cv2.drawContours(self.vis_image, [agent_arrow], 0, color, -1)
406 | 
407 |         if args.visualize:
408 |             # Displaying the image
409 |             cv2.imshow("Thread {}".format(self.rank), self.vis_image)
410 |             cv2.waitKey(1)
411 | 
412 |         if args.print_images:
413 |             fn = '{}/episodes/thread_{}/eps_{}/{}-{}-Vis-{}.png'.format(
414 |                 dump_dir, self.rank, self.episode_no,
415 |                 self.rank, self.episode_no, self.timestep)
416 |             cv2.imwrite(fn, self.vis_image)
417 | 


--------------------------------------------------------------------------------
/agents/utils/semantic_prediction.py:
--------------------------------------------------------------------------------
  1 | # The following code is largely borrowed from
  2 | # https://github.com/facebookresearch/detectron2/blob/master/demo/demo.py and
  3 | # https://github.com/facebookresearch/detectron2/blob/master/demo/predictor.py
  4 | 
  5 | import argparse
  6 | import time
  7 | 
  8 | import torch
  9 | import numpy as np
 10 | 
 11 | from detectron2.config import get_cfg
 12 | from detectron2.utils.logger import setup_logger
 13 | from detectron2.data.catalog import MetadataCatalog
 14 | from detectron2.modeling import build_model
 15 | from detectron2.checkpoint import DetectionCheckpointer
 16 | from detectron2.utils.visualizer import ColorMode, Visualizer
 17 | import detectron2.data.transforms as T
 18 | 
 19 | from constants import coco_categories_mapping
 20 | 
 21 | 
 22 | class SemanticPredMaskRCNN():
 23 | 
 24 |     def __init__(self, args):
 25 |         self.segmentation_model = ImageSegmentation(args)
 26 |         self.args = args
 27 | 
 28 |     def get_prediction(self, img):
 29 |         args = self.args
 30 |         image_list = []
 31 |         img = img[:, :, ::-1]
 32 |         image_list.append(img)
 33 |         seg_predictions, vis_output = self.segmentation_model.get_predictions(
 34 |             image_list, visualize=args.visualize == 2)
 35 | 
 36 |         if args.visualize == 2:
 37 |             img = vis_output.get_image()
 38 | 
 39 |         semantic_input = np.zeros((img.shape[0], img.shape[1], 15 + 1))
 40 | 
 41 |         for j, class_idx in enumerate(
 42 |                 seg_predictions[0]['instances'].pred_classes.cpu().numpy()):
 43 |             if class_idx in list(coco_categories_mapping.keys()):
 44 |                 idx = coco_categories_mapping[class_idx]
 45 |                 obj_mask = seg_predictions[0]['instances'].pred_masks[j] * 1.
 46 |                 semantic_input[:, :, idx] += obj_mask.cpu().numpy()
 47 | 
 48 |         return semantic_input, img
 49 | 
 50 | 
 51 | def compress_sem_map(sem_map):
 52 |     c_map = np.zeros((sem_map.shape[1], sem_map.shape[2]))
 53 |     for i in range(sem_map.shape[0]):
 54 |         c_map[sem_map[i] > 0.] = i + 1
 55 |     return c_map
 56 | 
 57 | 
 58 | class ImageSegmentation():
 59 |     def __init__(self, args):
 60 |         string_args = """
 61 |             --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
 62 |             --input input1.jpeg
 63 |             --confidence-threshold {}
 64 |             --opts MODEL.WEIGHTS
 65 |             detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
 66 |             """.format(args.sem_pred_prob_thr)
 67 | 
 68 |         if args.sem_gpu_id == -2:
 69 |             string_args += """ MODEL.DEVICE cpu"""
 70 |         else:
 71 |             string_args += """ MODEL.DEVICE cuda:{}""".format(args.sem_gpu_id)
 72 | 
 73 |         string_args = string_args.split()
 74 | 
 75 |         args = get_seg_parser().parse_args(string_args)
 76 |         logger = setup_logger()
 77 |         logger.info("Arguments: " + str(args))
 78 | 
 79 |         cfg = setup_cfg(args)
 80 |         self.demo = VisualizationDemo(cfg)
 81 | 
 82 |     def get_predictions(self, img, visualize=0):
 83 |         return self.demo.run_on_image(img, visualize=visualize)
 84 | 
 85 | 
 86 | def setup_cfg(args):
 87 |     # load config from file and command-line arguments
 88 |     cfg = get_cfg()
 89 |     cfg.merge_from_file(args.config_file)
 90 |     cfg.merge_from_list(args.opts)
 91 |     # Set score_threshold for builtin models
 92 |     cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
 93 |     cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
 94 |     cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = \
 95 |         args.confidence_threshold
 96 |     cfg.freeze()
 97 |     return cfg
 98 | 
 99 | 
100 | def get_seg_parser():
101 |     parser = argparse.ArgumentParser(
102 |         description="Detectron2 demo for builtin models")
103 |     parser.add_argument(
104 |         "--config-file",
105 |         default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
106 |         metavar="FILE",
107 |         help="path to config file",
108 |     )
109 |     parser.add_argument(
110 |         "--webcam",
111 |         action="store_true",
112 |         help="Take inputs from webcam.")
113 |     parser.add_argument("--video-input", help="Path to video file.")
114 |     parser.add_argument(
115 |         "--input",
116 |         nargs="+",
117 |         help="A list of space separated input images")
118 |     parser.add_argument(
119 |         "--output",
120 |         help="A file or directory to save output visualizations. "
121 |         "If not given, will show output in an OpenCV window.",
122 |     )
123 | 
124 |     parser.add_argument(
125 |         "--confidence-threshold",
126 |         type=float,
127 |         default=0.5,
128 |         help="Minimum score for instance predictions to be shown",
129 |     )
130 |     parser.add_argument(
131 |         "--opts",
132 |         help="Modify config options using the command-line 'KEY VALUE' pairs",
133 |         default=[],
134 |         nargs=argparse.REMAINDER,
135 |     )
136 |     return parser
137 | 
138 | 
139 | class VisualizationDemo(object):
140 |     def __init__(self, cfg, instance_mode=ColorMode.IMAGE):
141 |         """
142 |         Args:
143 |             cfg (CfgNode):
144 |             instance_mode (ColorMode):
145 |         """
146 |         self.metadata = MetadataCatalog.get(
147 |             cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
148 |         )
149 |         self.cpu_device = torch.device("cpu")
150 |         self.instance_mode = instance_mode
151 | 
152 |         self.predictor = BatchPredictor(cfg)
153 | 
154 |     def run_on_image(self, image_list, visualize=0):
155 |         """
156 |         Args:
157 |             image (np.ndarray): an image of shape (H, W, C) (in BGR order).
158 |                 This is the format used by OpenCV.
159 | 
160 |         Returns:
161 |             predictions (dict): the output of the model.
162 |             vis_output (VisImage): the visualized image output.
163 |         """
164 |         vis_output = None
165 |         all_predictions = self.predictor(image_list)
166 |         # Convert image from OpenCV BGR format to Matplotlib RGB format.
167 | 
168 |         if visualize:
169 |             predictions = all_predictions[0]
170 |             image = image_list[0]
171 |             visualizer = Visualizer(
172 |                 image, self.metadata, instance_mode=self.instance_mode)
173 |             if "panoptic_seg" in predictions:
174 |                 panoptic_seg, segments_info = predictions["panoptic_seg"]
175 |                 vis_output = visualizer.draw_panoptic_seg_predictions(
176 |                     panoptic_seg.to(self.cpu_device), segments_info
177 |                 )
178 |             else:
179 |                 if "sem_seg" in predictions:
180 |                     vis_output = visualizer.draw_sem_seg(
181 |                         predictions["sem_seg"].argmax(
182 |                             dim=0).to(self.cpu_device)
183 |                     )
184 |                 if "instances" in predictions:
185 |                     instances = predictions["instances"].to(self.cpu_device)
186 |                     vis_output = visualizer.draw_instance_predictions(
187 |                         predictions=instances)
188 | 
189 |         return all_predictions, vis_output
190 | 
191 | 
192 | class BatchPredictor:
193 |     """
194 |     Create a simple end-to-end predictor with the given config that runs on
195 |     single device for a list of input images.
196 | 
197 |     Compared to using the model directly, this class does the following
198 |     additions:
199 | 
200 |     1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
201 |     2. Always take BGR image as the input and apply conversion defined by
202 |          `cfg.INPUT.FORMAT`.
203 |     3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
204 |     4. Take a list of input images
205 | 
206 |     Attributes:
207 |         metadata (Metadata): the metadata of the underlying dataset, obtained
208 |             from cfg.DATASETS.TEST.
209 | 
210 |     """
211 | 
212 |     def __init__(self, cfg):
213 |         self.cfg = cfg.clone()  # cfg can be modified by model
214 |         self.model = build_model(self.cfg)
215 |         self.model.eval()
216 |         self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
217 | 
218 |         checkpointer = DetectionCheckpointer(self.model)
219 |         checkpointer.load(cfg.MODEL.WEIGHTS)
220 | 
221 |         self.input_format = cfg.INPUT.FORMAT
222 |         assert self.input_format in ["RGB", "BGR"], self.input_format
223 | 
224 |     def __call__(self, image_list):
225 |         """
226 |         Args:
227 |             image_list (list of np.ndarray): a list of images of
228 |                                              shape (H, W, C) (in BGR order).
229 | 
230 |         Returns:
231 |             predictions (dict):
232 |                 the output of the model for all images.
233 |                 See :doc:`/tutorials/models` for details about the format.
234 |         """
235 |         inputs = []
236 |         for original_image in image_list:
237 |             # https://github.com/sphinx-doc/sphinx/issues/4258
238 |             # Apply pre-processing to image.
239 |             if self.input_format == "RGB":
240 |                 # whether the model expects BGR inputs or RGB
241 |                 original_image = original_image[:, :, ::-1]
242 |             height, width = original_image.shape[:2]
243 |             image = original_image
244 |             image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
245 | 
246 |             instance = {"image": image, "height": height, "width": width}
247 | 
248 |             inputs.append(instance)
249 | 
250 |         with torch.no_grad():
251 |             predictions = self.model(inputs)
252 |             return predictions
253 | 


--------------------------------------------------------------------------------
/agents/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_contour_points(pos, origin, size=20):
 6 |     x, y, o = pos
 7 |     pt1 = (int(x) + origin[0],
 8 |            int(y) + origin[1])
 9 |     pt2 = (int(x + size / 1.5 * np.cos(o + np.pi * 4 / 3)) + origin[0],
10 |            int(y + size / 1.5 * np.sin(o + np.pi * 4 / 3)) + origin[1])
11 |     pt3 = (int(x + size * np.cos(o)) + origin[0],
12 |            int(y + size * np.sin(o)) + origin[1])
13 |     pt4 = (int(x + size / 1.5 * np.cos(o - np.pi * 4 / 3)) + origin[0],
14 |            int(y + size / 1.5 * np.sin(o - np.pi * 4 / 3)) + origin[1])
15 | 
16 |     return np.array([pt1, pt2, pt3, pt4])
17 | 
18 | 
19 | def draw_line(start, end, mat, steps=25, w=1):
20 |     for i in range(steps + 1):
21 |         x = int(np.rint(start[0] + (end[0] - start[0]) * i / steps))
22 |         y = int(np.rint(start[1] + (end[1] - start[1]) * i / steps))
23 |         mat[x - w:x + w, y - w:y + w] = 1
24 |     return mat
25 | 
26 | 
27 | def init_vis_image(goal_name, legend):
28 |     vis_image = np.ones((655, 1165, 3)).astype(np.uint8) * 255
29 |     font = cv2.FONT_HERSHEY_SIMPLEX
30 |     fontScale = 1
31 |     color = (20, 20, 20)  # BGR
32 |     thickness = 2
33 | 
34 |     text = "Observations (Goal: {})".format(goal_name)
35 |     textsize = cv2.getTextSize(text, font, fontScale, thickness)[0]
36 |     textX = (640 - textsize[0]) // 2 + 15
37 |     textY = (50 + textsize[1]) // 2
38 |     vis_image = cv2.putText(vis_image, text, (textX, textY),
39 |                             font, fontScale, color, thickness,
40 |                             cv2.LINE_AA)
41 | 
42 |     text = "Predicted Semantic Map"
43 |     textsize = cv2.getTextSize(text, font, fontScale, thickness)[0]
44 |     textX = 640 + (480 - textsize[0]) // 2 + 30
45 |     textY = (50 + textsize[1]) // 2
46 |     vis_image = cv2.putText(vis_image, text, (textX, textY),
47 |                             font, fontScale, color, thickness,
48 |                             cv2.LINE_AA)
49 | 
50 |     # draw outlines
51 |     color = [100, 100, 100]
52 |     vis_image[49, 15:655] = color
53 |     vis_image[49, 670:1150] = color
54 |     vis_image[50:530, 14] = color
55 |     vis_image[50:530, 655] = color
56 |     vis_image[50:530, 669] = color
57 |     vis_image[50:530, 1150] = color
58 |     vis_image[530, 15:655] = color
59 |     vis_image[530, 670:1150] = color
60 | 
61 |     # draw legend
62 |     lx, ly, _ = legend.shape
63 |     vis_image[537:537 + lx, 155:155 + ly, :] = legend
64 | 
65 |     return vis_image
66 | 


--------------------------------------------------------------------------------
/algo/__init__.py:
--------------------------------------------------------------------------------
1 | from .ppo import PPO
2 | 


--------------------------------------------------------------------------------
/algo/ppo.py:
--------------------------------------------------------------------------------
  1 | # The following code is largely borrowed from:
  2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/algo/ppo.py
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.optim as optim
  7 | 
  8 | 
  9 | class PPO():
 10 | 
 11 |     def __init__(
 12 |             self,
 13 |             actor_critic,
 14 |             clip_param,
 15 |             ppo_epoch,
 16 |             num_mini_batch,
 17 |             value_loss_coef,
 18 |             entropy_coef,
 19 |             lr=None,
 20 |             eps=None,
 21 |             max_grad_norm=None,
 22 |             use_clipped_value_loss=True):
 23 | 
 24 |         self.actor_critic = actor_critic
 25 | 
 26 |         self.clip_param = clip_param
 27 |         self.ppo_epoch = ppo_epoch
 28 |         self.num_mini_batch = num_mini_batch
 29 | 
 30 |         self.value_loss_coef = value_loss_coef
 31 |         self.entropy_coef = entropy_coef
 32 | 
 33 |         self.max_grad_norm = max_grad_norm
 34 |         self.use_clipped_value_loss = use_clipped_value_loss
 35 | 
 36 |         self.optimizer = optim.Adam(filter(lambda p: p.requires_grad,
 37 |                                            actor_critic.parameters()),
 38 |                                     lr=lr, eps=eps)
 39 | 
 40 |     def update(self, rollouts):
 41 |         advantages = rollouts.returns[:-1] - rollouts.value_preds[:-1]
 42 |         advantages = (advantages - advantages.mean()) / (
 43 |             advantages.std() + 1e-5)
 44 | 
 45 |         value_loss_epoch = 0
 46 |         action_loss_epoch = 0
 47 |         dist_entropy_epoch = 0
 48 | 
 49 |         for _ in range(self.ppo_epoch):
 50 | 
 51 |             if self.actor_critic.is_recurrent:
 52 |                 data_generator = rollouts.recurrent_generator(
 53 |                     advantages, self.num_mini_batch)
 54 |             else:
 55 |                 data_generator = rollouts.feed_forward_generator(
 56 |                     advantages, self.num_mini_batch)
 57 | 
 58 |             for sample in data_generator:
 59 | 
 60 |                 value_preds = sample['value_preds']
 61 |                 returns = sample['returns']
 62 |                 adv_targ = sample['adv_targ']
 63 | 
 64 |                 # Reshape to do in a single forward pass for all steps
 65 |                 values, action_log_probs, dist_entropy, _ = \
 66 |                     self.actor_critic.evaluate_actions(
 67 |                         sample['obs'], sample['rec_states'],
 68 |                         sample['masks'], sample['actions'],
 69 |                         extras=sample['extras']
 70 |                     )
 71 | 
 72 |                 ratio = torch.exp(action_log_probs -
 73 |                                   sample['old_action_log_probs'])
 74 |                 surr1 = ratio * adv_targ
 75 |                 surr2 = torch.clamp(ratio, 1.0 - self.clip_param,
 76 |                                     1.0 + self.clip_param) * adv_targ
 77 |                 action_loss = -torch.min(surr1, surr2).mean()
 78 | 
 79 |                 if self.use_clipped_value_loss:
 80 |                     value_pred_clipped = value_preds + \
 81 |                         (values - value_preds).clamp(
 82 |                             -self.clip_param, self.clip_param)
 83 |                     value_losses = (values - returns).pow(2)
 84 |                     value_losses_clipped = (value_pred_clipped
 85 |                                             - returns).pow(2)
 86 |                     value_loss = .5 * torch.max(value_losses,
 87 |                                                 value_losses_clipped).mean()
 88 |                 else:
 89 |                     value_loss = 0.5 * (returns - values).pow(2).mean()
 90 | 
 91 |                 self.optimizer.zero_grad()
 92 |                 (value_loss * self.value_loss_coef + action_loss -
 93 |                  dist_entropy * self.entropy_coef).backward()
 94 |                 nn.utils.clip_grad_norm_(self.actor_critic.parameters(),
 95 |                                          self.max_grad_norm)
 96 |                 self.optimizer.step()
 97 | 
 98 |                 value_loss_epoch += value_loss.item()
 99 |                 action_loss_epoch += action_loss.item()
100 |                 dist_entropy_epoch += dist_entropy.item()
101 | 
102 |         num_updates = self.ppo_epoch * self.num_mini_batch
103 | 
104 |         value_loss_epoch /= num_updates
105 |         action_loss_epoch /= num_updates
106 |         dist_entropy_epoch /= num_updates
107 | 
108 |         return value_loss_epoch, action_loss_epoch, dist_entropy_epoch
109 | 


--------------------------------------------------------------------------------
/arguments.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | 
  4 | 
  5 | def get_args():
  6 |     parser = argparse.ArgumentParser(
  7 |         description='Goal-Oriented-Semantic-Exploration')
  8 | 
  9 |     # General Arguments
 10 |     parser.add_argument('--seed', type=int, default=1,
 11 |                         help='random seed (default: 1)')
 12 |     parser.add_argument('--auto_gpu_config', type=int, default=1)
 13 |     parser.add_argument('--total_num_scenes', type=str, default="auto")
 14 |     parser.add_argument('-n', '--num_processes', type=int, default=5,
 15 |                         help="""how many training processes to use (default:5)
 16 |                                 Overridden when auto_gpu_config=1
 17 |                                 and training on gpus""")
 18 |     parser.add_argument('--num_processes_per_gpu', type=int, default=6)
 19 |     parser.add_argument('--num_processes_on_first_gpu', type=int, default=1)
 20 |     parser.add_argument('--eval', type=int, default=0,
 21 |                         help='0: Train, 1: Evaluate (default: 0)')
 22 |     parser.add_argument('--num_training_frames', type=int, default=10000000,
 23 |                         help='total number of training frames')
 24 |     parser.add_argument('--num_eval_episodes', type=int, default=200,
 25 |                         help="number of test episodes per scene")
 26 |     parser.add_argument('--num_train_episodes', type=int, default=10000,
 27 |                         help="""number of train episodes per scene
 28 |                                 before loading the next scene""")
 29 |     parser.add_argument('--no_cuda', action='store_true', default=False,
 30 |                         help='disables CUDA training')
 31 |     parser.add_argument("--sim_gpu_id", type=int, default=0,
 32 |                         help="gpu id on which scenes are loaded")
 33 |     parser.add_argument("--sem_gpu_id", type=int, default=-1,
 34 |                         help="""gpu id for semantic model,
 35 |                                 -1: same as sim gpu, -2: cpu""")
 36 | 
 37 |     # Logging, loading models, visualization
 38 |     parser.add_argument('--log_interval', type=int, default=10,
 39 |                         help="""log interval, one log per n updates
 40 |                                 (default: 10) """)
 41 |     parser.add_argument('--save_interval', type=int, default=1,
 42 |                         help="""save interval""")
 43 |     parser.add_argument('-d', '--dump_location', type=str, default="./tmp/",
 44 |                         help='path to dump models and log (default: ./tmp/)')
 45 |     parser.add_argument('--exp_name', type=str, default="exp1",
 46 |                         help='experiment name (default: exp1)')
 47 |     parser.add_argument('--save_periodic', type=int, default=500000,
 48 |                         help='Model save frequency in number of updates')
 49 |     parser.add_argument('--load', type=str, default="0",
 50 |                         help="""model path to load,
 51 |                                 0 to not reload (default: 0)""")
 52 |     parser.add_argument('-v', '--visualize', type=int, default=0,
 53 |                         help="""1: Render the observation and
 54 |                                    the predicted semantic map,
 55 |                                 2: Render the observation with semantic
 56 |                                    predictions and the predicted semantic map
 57 |                                 (default: 0)""")
 58 |     parser.add_argument('--print_images', type=int, default=0,
 59 |                         help='1: save visualization as images')
 60 | 
 61 |     # Environment, dataset and episode specifications
 62 |     parser.add_argument('-efw', '--env_frame_width', type=int, default=640,
 63 |                         help='Frame width (default:640)')
 64 |     parser.add_argument('-efh', '--env_frame_height', type=int, default=480,
 65 |                         help='Frame height (default:480)')
 66 |     parser.add_argument('-fw', '--frame_width', type=int, default=160,
 67 |                         help='Frame width (default:160)')
 68 |     parser.add_argument('-fh', '--frame_height', type=int, default=120,
 69 |                         help='Frame height (default:120)')
 70 |     parser.add_argument('-el', '--max_episode_length', type=int, default=500,
 71 |                         help="""Maximum episode length""")
 72 |     parser.add_argument("--task_config", type=str,
 73 |                         default="tasks/objectnav_gibson.yaml",
 74 |                         help="path to config yaml containing task information")
 75 |     parser.add_argument("--split", type=str, default="train",
 76 |                         help="dataset split (train | val | val_mini) ")
 77 |     parser.add_argument('--camera_height', type=float, default=0.88,
 78 |                         help="agent camera height in metres")
 79 |     parser.add_argument('--hfov', type=float, default=79.0,
 80 |                         help="horizontal field of view in degrees")
 81 |     parser.add_argument('--turn_angle', type=float, default=30,
 82 |                         help="Agent turn angle in degrees")
 83 |     parser.add_argument('--min_depth', type=float, default=0.5,
 84 |                         help="Minimum depth for depth sensor in meters")
 85 |     parser.add_argument('--max_depth', type=float, default=5.0,
 86 |                         help="Maximum depth for depth sensor in meters")
 87 |     parser.add_argument('--success_dist', type=float, default=1.0,
 88 |                         help="success distance threshold in meters")
 89 |     parser.add_argument('--floor_thr', type=int, default=50,
 90 |                         help="floor threshold in cm")
 91 |     parser.add_argument('--min_d', type=float, default=1.5,
 92 |                         help="min distance to goal during training in meters")
 93 |     parser.add_argument('--max_d', type=float, default=100.0,
 94 |                         help="max distance to goal during training in meters")
 95 |     parser.add_argument('--version', type=str, default="v1.1",
 96 |                         help="dataset version")
 97 | 
 98 |     # Model Hyperparameters
 99 |     parser.add_argument('--agent', type=str, default="sem_exp")
100 |     parser.add_argument('--lr', type=float, default=2.5e-5,
101 |                         help='learning rate (default: 2.5e-5)')
102 |     parser.add_argument('--global_hidden_size', type=int, default=256,
103 |                         help='global_hidden_size')
104 |     parser.add_argument('--eps', type=float, default=1e-5,
105 |                         help='RL Optimizer epsilon (default: 1e-5)')
106 |     parser.add_argument('--alpha', type=float, default=0.99,
107 |                         help='RL Optimizer alpha (default: 0.99)')
108 |     parser.add_argument('--gamma', type=float, default=0.99,
109 |                         help='discount factor for rewards (default: 0.99)')
110 |     parser.add_argument('--use_gae', action='store_true', default=False,
111 |                         help='use generalized advantage estimation')
112 |     parser.add_argument('--tau', type=float, default=0.95,
113 |                         help='gae parameter (default: 0.95)')
114 |     parser.add_argument('--entropy_coef', type=float, default=0.001,
115 |                         help='entropy term coefficient (default: 0.01)')
116 |     parser.add_argument('--value_loss_coef', type=float, default=0.5,
117 |                         help='value loss coefficient (default: 0.5)')
118 |     parser.add_argument('--max_grad_norm', type=float, default=0.5,
119 |                         help='max norm of gradients (default: 0.5)')
120 |     parser.add_argument('--num_global_steps', type=int, default=20,
121 |                         help='number of forward steps in A2C (default: 5)')
122 |     parser.add_argument('--ppo_epoch', type=int, default=4,
123 |                         help='number of ppo epochs (default: 4)')
124 |     parser.add_argument('--num_mini_batch', type=str, default="auto",
125 |                         help='number of batches for ppo (default: 32)')
126 |     parser.add_argument('--clip_param', type=float, default=0.2,
127 |                         help='ppo clip parameter (default: 0.2)')
128 |     parser.add_argument('--use_recurrent_global', type=int, default=0,
129 |                         help='use a recurrent global policy')
130 |     parser.add_argument('--num_local_steps', type=int, default=25,
131 |                         help="""Number of steps the local policy
132 |                                 between each global step""")
133 |     parser.add_argument('--reward_coeff', type=float, default=0.1,
134 |                         help="Object goal reward coefficient")
135 |     parser.add_argument('--intrinsic_rew_coeff', type=float, default=0.02,
136 |                         help="intrinsic exploration reward coefficient")
137 |     parser.add_argument('--num_sem_categories', type=float, default=16)
138 |     parser.add_argument('--sem_pred_prob_thr', type=float, default=0.9,
139 |                         help="Semantic prediction confidence threshold")
140 | 
141 |     # Mapping
142 |     parser.add_argument('--global_downscaling', type=int, default=2)
143 |     parser.add_argument('--vision_range', type=int, default=100)
144 |     parser.add_argument('--map_resolution', type=int, default=5)
145 |     parser.add_argument('--du_scale', type=int, default=1)
146 |     parser.add_argument('--map_size_cm', type=int, default=2400)
147 |     parser.add_argument('--cat_pred_threshold', type=float, default=5.0)
148 |     parser.add_argument('--map_pred_threshold', type=float, default=1.0)
149 |     parser.add_argument('--exp_pred_threshold', type=float, default=1.0)
150 |     parser.add_argument('--collision_threshold', type=float, default=0.20)
151 | 
152 |     # parse arguments
153 |     args = parser.parse_args()
154 | 
155 |     args.cuda = not args.no_cuda and torch.cuda.is_available()
156 | 
157 |     if args.cuda:
158 |         if args.auto_gpu_config:
159 |             num_gpus = torch.cuda.device_count()
160 |             if args.total_num_scenes != "auto":
161 |                 args.total_num_scenes = int(args.total_num_scenes)
162 |             elif "objectnav_gibson" in args.task_config and \
163 |                     "train" in args.split:
164 |                 args.total_num_scenes = 25
165 |             elif "objectnav_gibson" in args.task_config and \
166 |                     "val" in args.split:
167 |                 args.total_num_scenes = 5
168 |             else:
169 |                 assert False, "Unknown task config, please specify" + \
170 |                     " total_num_scenes"
171 | 
172 |             # GPU Memory required for the SemExp model:
173 |             #       0.8 + 0.4 * args.total_num_scenes (GB)
174 |             # GPU Memory required per thread: 2.6 (GB)
175 |             min_memory_required = max(0.8 + 0.4 * args.total_num_scenes, 2.6)
176 |             # Automatically configure number of training threads based on
177 |             # number of GPUs available and GPU memory size
178 |             gpu_memory = 1000
179 |             for i in range(num_gpus):
180 |                 gpu_memory = min(gpu_memory,
181 |                                  torch.cuda.get_device_properties(
182 |                                      i).total_memory
183 |                                  / 1024 / 1024 / 1024)
184 |                 assert gpu_memory > min_memory_required, \
185 |                     """Insufficient GPU memory for GPU {}, gpu memory ({}GB)
186 |                     needs to be greater than {}GB""".format(
187 |                         i, gpu_memory, min_memory_required)
188 | 
189 |             num_processes_per_gpu = int(gpu_memory / 2.6)
190 |             num_processes_on_first_gpu = \
191 |                 int((gpu_memory - min_memory_required) / 2.6)
192 | 
193 |             if args.eval:
194 |                 max_threads = num_processes_per_gpu * (num_gpus - 1) \
195 |                     + num_processes_on_first_gpu
196 |                 assert max_threads >= args.total_num_scenes, \
197 |                     """Insufficient GPU memory for evaluation"""
198 | 
199 |             if num_gpus == 1:
200 |                 args.num_processes_on_first_gpu = num_processes_on_first_gpu
201 |                 args.num_processes_per_gpu = 0
202 |                 args.num_processes = num_processes_on_first_gpu
203 |                 assert args.num_processes > 0, "Insufficient GPU memory"
204 |             else:
205 |                 num_threads = num_processes_per_gpu * (num_gpus - 1) \
206 |                     + num_processes_on_first_gpu
207 |                 num_threads = min(num_threads, args.total_num_scenes)
208 |                 args.num_processes_per_gpu = num_processes_per_gpu
209 |                 args.num_processes_on_first_gpu = max(
210 |                     0,
211 |                     num_threads - args.num_processes_per_gpu * (num_gpus - 1))
212 |                 args.num_processes = num_threads
213 | 
214 |             args.sim_gpu_id = 1
215 | 
216 |             print("Auto GPU config:")
217 |             print("Number of processes: {}".format(args.num_processes))
218 |             print("Number of processes on GPU 0: {}".format(
219 |                 args.num_processes_on_first_gpu))
220 |             print("Number of processes per GPU: {}".format(
221 |                 args.num_processes_per_gpu))
222 |     else:
223 |         args.sem_gpu_id = -2
224 | 
225 |     if args.num_mini_batch == "auto":
226 |         args.num_mini_batch = max(args.num_processes // 2, 1)
227 |     else:
228 |         args.num_mini_batch = int(args.num_mini_batch)
229 | 
230 |     return args
231 | 


--------------------------------------------------------------------------------
/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
 1 | scenes = {}
 2 | scenes["train"] = [
 3 |     'Allensville',
 4 |     'Beechwood',
 5 |     'Benevolence',
 6 |     'Coffeen',
 7 |     'Cosmos',
 8 |     'Forkland',
 9 |     'Hanson',
10 |     'Hiteman',
11 |     'Klickitat',
12 |     'Lakeville',
13 |     'Leonardo',
14 |     'Lindenwood',
15 |     'Marstons',
16 |     'Merom',
17 |     'Mifflinburg',
18 |     'Newfields',
19 |     'Onaga',
20 |     'Pinesdale',
21 |     'Pomaria',
22 |     'Ranchester',
23 |     'Shelbyville',
24 |     'Stockman',
25 |     'Tolstoy',
26 |     'Wainscott',
27 |     'Woodbine',
28 | ]
29 | 
30 | scenes["val"] = [
31 |     'Collierville',
32 |     'Corozal',
33 |     'Darden',
34 |     'Markleeville',
35 |     'Wiconisco',
36 | ]
37 | 
38 | coco_categories = {
39 |     "chair": 0,
40 |     "couch": 1,
41 |     "potted plant": 2,
42 |     "bed": 3,
43 |     "toilet": 4,
44 |     "tv": 5,
45 |     "dining-table": 6,
46 |     "oven": 7,
47 |     "sink": 8,
48 |     "refrigerator": 9,
49 |     "book": 10,
50 |     "clock": 11,
51 |     "vase": 12,
52 |     "cup": 13,
53 |     "bottle": 14
54 | }
55 | 
56 | coco_categories_mapping = {
57 |     56: 0,  # chair
58 |     57: 1,  # couch
59 |     58: 2,  # potted plant
60 |     59: 3,  # bed
61 |     61: 4,  # toilet
62 |     62: 5,  # tv
63 |     60: 6,  # dining-table
64 |     69: 7,  # oven
65 |     71: 8,  # sink
66 |     72: 9,  # refrigerator
67 |     73: 10,  # book
68 |     74: 11,  # clock
69 |     75: 12,  # vase
70 |     41: 13,  # cup
71 |     39: 14,  # bottle
72 | }
73 | 
74 | color_palette = [
75 |     1.0, 1.0, 1.0,
76 |     0.6, 0.6, 0.6,
77 |     0.95, 0.95, 0.95,
78 |     0.96, 0.36, 0.26,
79 |     0.12156862745098039, 0.47058823529411764, 0.7058823529411765,
80 |     0.9400000000000001, 0.7818, 0.66,
81 |     0.9400000000000001, 0.8868, 0.66,
82 |     0.8882000000000001, 0.9400000000000001, 0.66,
83 |     0.7832000000000001, 0.9400000000000001, 0.66,
84 |     0.6782000000000001, 0.9400000000000001, 0.66,
85 |     0.66, 0.9400000000000001, 0.7468000000000001,
86 |     0.66, 0.9400000000000001, 0.8518000000000001,
87 |     0.66, 0.9232, 0.9400000000000001,
88 |     0.66, 0.8182, 0.9400000000000001,
89 |     0.66, 0.7132, 0.9400000000000001,
90 |     0.7117999999999999, 0.66, 0.9400000000000001,
91 |     0.8168, 0.66, 0.9400000000000001,
92 |     0.9218, 0.66, 0.9400000000000001,
93 |     0.9400000000000001, 0.66, 0.8531999999999998,
94 |     0.9400000000000001, 0.66, 0.748199999999999]
95 | 


--------------------------------------------------------------------------------
/docs/DOCKER_INSTRUCTIONS.md:
--------------------------------------------------------------------------------
 1 | # Docker and Singularity Instructions:
 2 | We provide experimental [docker](https://www.docker.com/) and [singularity](https://sylabs.io/) images with all the dependencies installed.
 3 | 
 4 | Before setting up the docker, pull the code using:
 5 | ```
 6 | git clone https://github.com/devendrachaplot/Object-Goal-Navigation/
 7 | cd Object-Goal-Navigation/;
 8 | ```
 9 | Download and set up the scene and episode datasets as described [here](README.md#setup).
10 | 
11 | For docker, either build docker image using the provided [Dockerfile](./Dockerfile):
12 | ```
13 | docker build -t devendrachaplot/habitat:sem_exp .
14 | ```
15 | Or pull docker image from dockerhub:
16 | ```
17 | docker pull devendrachaplot/habitat:sem_exp
18 | ```
19 | 
20 | After building or pulling the docker image, run the docker using:
21 | ```
22 | docker run -v $(pwd)/:/code -v $(pwd)/data:/code/data --runtime=nvidia -it devendrachaplot/habitat:sem_exp
23 | ```
24 | 
25 | Inside the docker, check the habitat compatibility with your system:
26 | ```
27 | cd /habitat-api/
28 | python examples/benchmark.py
29 | ```
30 | 
31 | To run the SemExp model inside the docker, `cd /code/` and run the same commands as described in [INSTRUCTIONS](./INSTRUCTIONS.md).
32 | 
33 | For pulling the singularity image:
34 | ```
35 | singularity pull docker://devendrachaplot/habitat:sem_exp
36 | ```


--------------------------------------------------------------------------------
/docs/INSTRUCTIONS.md:
--------------------------------------------------------------------------------
  1 | # Instructions
  2 | 
  3 | ## Training
  4 | For training the SemExp model on the Object Goal Navigation task:
  5 | ```
  6 | python main.py
  7 | ```
  8 | 
  9 | ### Specifying number of threads
 10 | The code runs multiple parallel threads for training. Each thread loads a scene on a GPU. The code automatically decides the total number of threads and number of threads on each GPU based on the available GPUs.
 11 | 
 12 | If you would like to not use the auto gpu config, you need to specify the following:
 13 | ```
 14 | --auto_gpu_config 0
 15 | -n, --num_processes NUM_PROCESSES
 16 | --num_processes_per_gpu NUM_PROCESSES_PER_GPU
 17 | --num_processes_on_first_gpu NUM_PROCESSES_ON_FIRST_GPU 
 18 | ```
 19 | `NUM_PROCESSES_PER_GPU` will depend on your GPU memory, 6 works well for 16GB GPUs.
 20 | `NUM_PROCESSES_ON_FIRST_GPU` specifies the number of processes on the first GPU in addition to the SemExp model, 1 works well for 16GB GPUs.
 21 | `NUM_PROCESSES` depends on the number of GPUs used for training and `NUM_PROCESSES_PER_GPU` such that 
 22 | ```
 23 | NUM_PROCESSES <= min(NUM_PROCESSES_PER_GPU * number of GPUs + NUM_PROCESSES_ON_FIRST_GPU, 25)
 24 | ```
 25 | The Gibson training set consists of 25 scenes.
 26 | 
 27 | For example, for training the model on 5 GPUs with 16GB memory per GPU:
 28 | ```
 29 | python main.py --auto_gpu_config 0 -n 25 --num_processes_per_gpu 6 --num_processes_on_first_gpu 1 --sim_gpu_id 1 
 30 | ```
 31 | Here, `sim_gpu_id = 1` specifies simulator threads to run from GPUs 1 onwards.
 32 | Each GPU from 1 to 4 will run 6 threads each, and GPU 0 will run 1 thread and
 33 | the SemExp model.
 34 | 
 35 | ### Specifying log location, periodic model dumps
 36 | ```
 37 | python main.py -d saved/ --exp_name exp1 --save_periodic 500000
 38 | ```
 39 | The above will save the best model files and training log at `saved/models/exp1/` and save all models periodically every 500000 steps at `saved/dump/exp1/`. Each module will be saved in a separate file. 
 40 | 
 41 | ### Hyper-parameters
 42 | Most of the default hyper-parameters should work fine. Some hyperparameters are set for training with 25 threads, which might need to be tuned when using fewer threads. Fewer threads lead to a smaller batch size so the learning rate might need to be tuned using `--lr`.
 43 | 
 44 | ## Downloading pre-trained models
 45 | ```
 46 | mkdir pretrained_models;
 47 | wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=171ZA7XNu5vi3XLpuKs8DuGGZrYyuSjL0' -O pretrained_models/sem_exp.pth
 48 | ```
 49 | 
 50 | ## Evaluation
 51 | 
 52 | The following are instructions to evaluate the model on the Gibson val set.
 53 | 
 54 | For evaluating the pre-trained model:
 55 | ```
 56 | python main.py --split val --eval 1 --load pretrained_models/sem_exp.pth
 57 | ```
 58 | 
 59 | The pre-trained model should get 0.657 Success, 0.339 SPL and 1.474 DTG.
 60 | 
 61 | ### Manual GPU config
 62 | 
 63 | If you would like to not use the auto GPU config, specify the number of threads for evaluation using `--num_processes` and the number of evaluation episodes per thread using `--num_eval_episodes`.
 64 | The Gibson val set consists of 5 scenes and 200 episodes per scene. Thus, we need to use 5 threads for evaluation, and 200 episodes per thread. Split 5 scenes on GPUs based on your GPU memory sizes. The code requires `0.8 + 0.4 * num_scenes (GB)` GPU memory on the first GPU for the model and around 2.6GB memory per scene.
 65 | 
 66 | For example, if you have 1 GPU with 16GB memory:
 67 | ```
 68 | python main.py --split val --eval 1 --auto_gpu_config 0 \
 69 | -n 5 --num_eval_episodes 200 --num_processes_on_first_gpu 5 \
 70 | --load pretrained_models/sem_exp.pth
 71 | ```
 72 | or if you have 2 GPUs with 12GB memory each:
 73 | ```
 74 | python main.py --split val --eval 1 --auto_gpu_config 0 \
 75 | -n 5 --num_eval_episodes 200 --num_processes_on_first_gpu 1 \
 76 | --num_processes_per_gpu 4 --sim_gpu_id 1 \
 77 | --load pretrained_models/sem_exp.pth
 78 | ```
 79 | 
 80 | ### Visualization and printing images
 81 | For visualizing the agent observations and predicted map and pose, add `-v 1` as an argument to the above command. This will require a display to be attached to the system.
 82 | 
 83 | To visualize on headless systems (without display), use `--print_images 1 -d results/ --exp_name exp1`. This will save the visualization images in `results/dump/exp1/episodes/`.
 84 | 
 85 | Both `-v 1` and `--print_images 1` can be used together to visualize and print images at the same time. 
 86 | 
 87 | 
 88 | ## Notes
 89 | 
 90 | - Training the model for 10 million frames with 25 threads takes around 2.5 days on an Nvidia DGX-1 system using 5 16GB GPUs, but the model provides good performance even with only 1 million frames (~6 hrs) of training.
 91 | 
 92 | - Evaluating the model on the val set for 1000 episodes with 5 threads takes around 2.5 hrs on an Nvidia DGX-1 system.
 93 | 
 94 | - The code does not contain the Denoising Network described in our [paper](https://arxiv.org/pdf/2007.00643.pdf).
 95 | This is because of the following reasons:
 96 |   - Training the Denoising Network requires downloading the original Gibson dataset (non-Habitat format), 3DSceneGraph dataset, and building Habitat format semantic scenes using both the datasets.
 97 |   - Training the Denoising Network requires building and cleaning top-down maps which makes training much slower.
 98 |   - The first-person semantic annotations for Gibson are not perfectly accurate, they do not align with the depth sensor. This results in Denoising Network only providing a marginal performance improvement.
 99 | 
100 | 
101 | ## Tips
102 | To silence the habitat sim log add the following to your `~/.bashrc` (Linux) or `~/.bash_profile` (Mac) 
103 | ```
104 | export GLOG_minloglevel=2
105 | export MAGNUM_LOG="quiet"
106 | ```


--------------------------------------------------------------------------------
/docs/example.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devendrachaplot/Object-Goal-Navigation/5d76902fe9be821926a1de32557ca9a8dc21d0f5/docs/example.gif


--------------------------------------------------------------------------------
/docs/legend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devendrachaplot/Object-Goal-Navigation/5d76902fe9be821926a1de32557ca9a8dc21d0f5/docs/legend.png


--------------------------------------------------------------------------------
/docs/overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devendrachaplot/Object-Goal-Navigation/5d76902fe9be821926a1de32557ca9a8dc21d0f5/docs/overview.jpg


--------------------------------------------------------------------------------
/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .habitat import construct_envs
 4 | 
 5 | 
 6 | def make_vec_envs(args):
 7 |     envs = construct_envs(args)
 8 |     envs = VecPyTorch(envs, args.device)
 9 |     return envs
10 | 
11 | 
12 | # Adapted from
13 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/envs.py#L159
14 | class VecPyTorch():
15 | 
16 |     def __init__(self, venv, device):
17 |         self.venv = venv
18 |         self.num_envs = venv.num_envs
19 |         self.observation_space = venv.observation_space
20 |         self.action_space = venv.action_space
21 |         self.device = device
22 | 
23 |     def reset(self):
24 |         obs, info = self.venv.reset()
25 |         obs = torch.from_numpy(obs).float().to(self.device)
26 |         return obs, info
27 | 
28 |     def step_async(self, actions):
29 |         actions = actions.cpu().numpy()
30 |         self.venv.step_async(actions)
31 | 
32 |     def step_wait(self):
33 |         obs, reward, done, info = self.venv.step_wait()
34 |         obs = torch.from_numpy(obs).float().to(self.device)
35 |         reward = torch.from_numpy(reward).float()
36 |         return obs, reward, done, info
37 | 
38 |     def step(self, actions):
39 |         actions = actions.cpu().numpy()
40 |         obs, reward, done, info = self.venv.step(actions)
41 |         obs = torch.from_numpy(obs).float().to(self.device)
42 |         reward = torch.from_numpy(reward).float()
43 |         return obs, reward, done, info
44 | 
45 |     def get_rewards(self, inputs):
46 |         reward = self.venv.get_rewards(inputs)
47 |         reward = torch.from_numpy(reward).float()
48 |         return reward
49 | 
50 |     def plan_act_and_preprocess(self, inputs):
51 |         obs, reward, done, info = self.venv.plan_act_and_preprocess(inputs)
52 |         obs = torch.from_numpy(obs).float().to(self.device)
53 |         reward = torch.from_numpy(reward).float()
54 |         return obs, reward, done, info
55 | 
56 |     def close(self):
57 |         return self.venv.close()
58 | 


--------------------------------------------------------------------------------
/envs/habitat/__init__.py:
--------------------------------------------------------------------------------
  1 | # Parts of the code in this file have been borrowed from:
  2 | #    https://github.com/facebookresearch/habitat-api
  3 | import os
  4 | import numpy as np
  5 | import torch
  6 | from habitat.config.default import get_config as cfg_env
  7 | from habitat.datasets.pointnav.pointnav_dataset import PointNavDatasetV1
  8 | from habitat import Config, Env, RLEnv, VectorEnv, make_dataset
  9 | 
 10 | from agents.sem_exp import Sem_Exp_Env_Agent
 11 | from .objectgoal_env import ObjectGoal_Env
 12 | 
 13 | from .utils.vector_env import VectorEnv
 14 | 
 15 | 
 16 | def make_env_fn(args, config_env, rank):
 17 |     dataset = make_dataset(config_env.DATASET.TYPE, config=config_env.DATASET)
 18 |     config_env.defrost()
 19 |     config_env.SIMULATOR.SCENE = dataset.episodes[0].scene_id
 20 |     config_env.freeze()
 21 | 
 22 |     if args.agent == "sem_exp":
 23 |         env = Sem_Exp_Env_Agent(args=args, rank=rank,
 24 |                                 config_env=config_env,
 25 |                                 dataset=dataset
 26 |                                 )
 27 |     else:
 28 |         env = ObjectGoal_Env(args=args, rank=rank,
 29 |                              config_env=config_env,
 30 |                              dataset=dataset
 31 |                              )
 32 | 
 33 |     env.seed(rank)
 34 |     return env
 35 | 
 36 | 
 37 | def _get_scenes_from_folder(content_dir):
 38 |     scene_dataset_ext = ".glb.json.gz"
 39 |     scenes = []
 40 |     for filename in os.listdir(content_dir):
 41 |         if filename.endswith(scene_dataset_ext):
 42 |             scene = filename[: -len(scene_dataset_ext) + 4]
 43 |             scenes.append(scene)
 44 |     scenes.sort()
 45 |     return scenes
 46 | 
 47 | 
 48 | def construct_envs(args):
 49 |     env_configs = []
 50 |     args_list = []
 51 | 
 52 |     basic_config = cfg_env(config_paths=["envs/habitat/configs/"
 53 |                                          + args.task_config])
 54 |     basic_config.defrost()
 55 |     basic_config.DATASET.SPLIT = args.split
 56 |     basic_config.DATASET.DATA_PATH = \
 57 |         basic_config.DATASET.DATA_PATH.replace("v1", args.version)
 58 |     basic_config.DATASET.EPISODES_DIR = \
 59 |         basic_config.DATASET.EPISODES_DIR.replace("v1", args.version)
 60 |     basic_config.freeze()
 61 | 
 62 |     scenes = basic_config.DATASET.CONTENT_SCENES
 63 |     if "*" in basic_config.DATASET.CONTENT_SCENES:
 64 |         content_dir = os.path.join(basic_config.DATASET.EPISODES_DIR.format(
 65 |             split=args.split), "content")
 66 |         scenes = _get_scenes_from_folder(content_dir)
 67 | 
 68 |     if len(scenes) > 0:
 69 |         assert len(scenes) >= args.num_processes, (
 70 |             "reduce the number of processes as there "
 71 |             "aren't enough number of scenes"
 72 |         )
 73 | 
 74 |         scene_split_sizes = [int(np.floor(len(scenes) / args.num_processes))
 75 |                              for _ in range(args.num_processes)]
 76 |         for i in range(len(scenes) % args.num_processes):
 77 |             scene_split_sizes[i] += 1
 78 | 
 79 |     print("Scenes per thread:")
 80 |     for i in range(args.num_processes):
 81 |         config_env = cfg_env(config_paths=["envs/habitat/configs/"
 82 |                                            + args.task_config])
 83 |         config_env.defrost()
 84 | 
 85 |         if len(scenes) > 0:
 86 |             config_env.DATASET.CONTENT_SCENES = scenes[
 87 |                 sum(scene_split_sizes[:i]):
 88 |                 sum(scene_split_sizes[:i + 1])
 89 |             ]
 90 |             print("Thread {}: {}".format(i, config_env.DATASET.CONTENT_SCENES))
 91 | 
 92 |         if i < args.num_processes_on_first_gpu:
 93 |             gpu_id = 0
 94 |         else:
 95 |             gpu_id = int((i - args.num_processes_on_first_gpu)
 96 |                          // args.num_processes_per_gpu) + args.sim_gpu_id
 97 |         gpu_id = min(torch.cuda.device_count() - 1, gpu_id)
 98 |         config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_id
 99 | 
100 |         agent_sensors = []
101 |         agent_sensors.append("RGB_SENSOR")
102 |         agent_sensors.append("DEPTH_SENSOR")
103 |         # agent_sensors.append("SEMANTIC_SENSOR")
104 | 
105 |         config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
106 | 
107 |         # Reseting episodes manually, setting high max episode length in sim
108 |         config_env.ENVIRONMENT.MAX_EPISODE_STEPS = 10000000
109 |         config_env.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False
110 | 
111 |         config_env.SIMULATOR.RGB_SENSOR.WIDTH = args.env_frame_width
112 |         config_env.SIMULATOR.RGB_SENSOR.HEIGHT = args.env_frame_height
113 |         config_env.SIMULATOR.RGB_SENSOR.HFOV = args.hfov
114 |         config_env.SIMULATOR.RGB_SENSOR.POSITION = [0, args.camera_height, 0]
115 | 
116 |         config_env.SIMULATOR.DEPTH_SENSOR.WIDTH = args.env_frame_width
117 |         config_env.SIMULATOR.DEPTH_SENSOR.HEIGHT = args.env_frame_height
118 |         config_env.SIMULATOR.DEPTH_SENSOR.HFOV = args.hfov
119 |         config_env.SIMULATOR.DEPTH_SENSOR.MIN_DEPTH = args.min_depth
120 |         config_env.SIMULATOR.DEPTH_SENSOR.MAX_DEPTH = args.max_depth
121 |         config_env.SIMULATOR.DEPTH_SENSOR.POSITION = [0, args.camera_height, 0]
122 | 
123 |         # config_env.SIMULATOR.SEMANTIC_SENSOR.WIDTH = args.env_frame_width
124 |         # config_env.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = args.env_frame_height
125 |         # config_env.SIMULATOR.SEMANTIC_SENSOR.HFOV = args.hfov
126 |         # config_env.SIMULATOR.SEMANTIC_SENSOR.POSITION = \
127 |         #     [0, args.camera_height, 0]
128 | 
129 |         config_env.SIMULATOR.TURN_ANGLE = args.turn_angle
130 |         config_env.DATASET.SPLIT = args.split
131 |         config_env.DATASET.DATA_PATH = \
132 |             config_env.DATASET.DATA_PATH.replace("v1", args.version)
133 |         config_env.DATASET.EPISODES_DIR = \
134 |             config_env.DATASET.EPISODES_DIR.replace("v1", args.version)
135 | 
136 |         config_env.freeze()
137 |         env_configs.append(config_env)
138 | 
139 |         args_list.append(args)
140 | 
141 |     envs = VectorEnv(
142 |         make_env_fn=make_env_fn,
143 |         env_fn_args=tuple(
144 |             tuple(
145 |                 zip(args_list, env_configs, range(args.num_processes))
146 |             )
147 |         ),
148 |     )
149 | 
150 |     return envs
151 | 


--------------------------------------------------------------------------------
/envs/habitat/configs/tasks/objectnav_gibson.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 500
 3 | SIMULATOR:
 4 |   TURN_ANGLE: 30
 5 |   TILT_ANGLE: 30
 6 |   ACTION_SPACE_CONFIG: "v1"
 7 |   AGENT_0:
 8 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'SEMANTIC_SENSOR']
 9 |     HEIGHT: 0.88
10 |     RADIUS: 0.18
11 |   HABITAT_SIM_V0:
12 |     GPU_DEVICE_ID: 0
13 |     ALLOW_SLIDING: True 
14 |   SEMANTIC_SENSOR:
15 |     WIDTH: 640
16 |     HEIGHT: 480
17 |     HFOV: 79
18 |     POSITION: [0, 0.88, 0]
19 |   RGB_SENSOR:
20 |     WIDTH: 640
21 |     HEIGHT: 480
22 |     HFOV: 79
23 |     POSITION: [0, 0.88, 0]
24 |   DEPTH_SENSOR:
25 |     WIDTH: 640
26 |     HEIGHT: 480
27 |     HFOV: 79
28 |     MIN_DEPTH: 0.5
29 |     MAX_DEPTH: 5.0
30 |     POSITION: [0, 0.88, 0]
31 | TASK:
32 |   TYPE: ObjectNav-v1
33 |   POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"]
34 |   SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR']
35 |   MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL']
36 |   SUCCESS:
37 |     SUCCESS_DISTANCE: 0.2
38 | 
39 | DATASET:
40 |   TYPE: PointNav-v1
41 |   SPLIT: train
42 |   DATA_PATH: "data/datasets/objectnav/gibson/v1/{split}/{split}.json.gz"
43 |   EPISODES_DIR: "data/datasets/objectnav/gibson/v1/{split}/"
44 |   SCENES_DIR: "data/scene_datasets/"
45 | 


--------------------------------------------------------------------------------
/envs/habitat/objectgoal_env.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import bz2
  3 | import gzip
  4 | import _pickle as cPickle
  5 | import gym
  6 | import numpy as np
  7 | import quaternion
  8 | import skimage.morphology
  9 | import habitat
 10 | 
 11 | from envs.utils.fmm_planner import FMMPlanner
 12 | from constants import coco_categories
 13 | import envs.utils.pose as pu
 14 | 
 15 | 
 16 | class ObjectGoal_Env(habitat.RLEnv):
 17 |     """The Object Goal Navigation environment class. The class is responsible
 18 |     for loading the dataset, generating episodes, and computing evaluation
 19 |     metrics.
 20 |     """
 21 | 
 22 |     def __init__(self, args, rank, config_env, dataset):
 23 |         self.args = args
 24 |         self.rank = rank
 25 | 
 26 |         super().__init__(config_env, dataset)
 27 | 
 28 |         # Loading dataset info file
 29 |         self.split = config_env.DATASET.SPLIT
 30 |         self.episodes_dir = config_env.DATASET.EPISODES_DIR.format(
 31 |             split=self.split)
 32 | 
 33 |         dataset_info_file = self.episodes_dir + \
 34 |             "{split}_info.pbz2".format(split=self.split)
 35 |         with bz2.BZ2File(dataset_info_file, 'rb') as f:
 36 |             self.dataset_info = cPickle.load(f)
 37 | 
 38 |         # Specifying action and observation space
 39 |         self.action_space = gym.spaces.Discrete(3)
 40 | 
 41 |         self.observation_space = gym.spaces.Box(0, 255,
 42 |                                                 (3, args.frame_height,
 43 |                                                  args.frame_width),
 44 |                                                 dtype='uint8')
 45 | 
 46 |         # Initializations
 47 |         self.episode_no = 0
 48 | 
 49 |         # Scene info
 50 |         self.last_scene_path = None
 51 |         self.scene_path = None
 52 |         self.scene_name = None
 53 | 
 54 |         # Episode Dataset info
 55 |         self.eps_data = None
 56 |         self.eps_data_idx = None
 57 |         self.gt_planner = None
 58 |         self.object_boundary = None
 59 |         self.goal_idx = None
 60 |         self.goal_name = None
 61 |         self.map_obj_origin = None
 62 |         self.starting_loc = None
 63 |         self.starting_distance = None
 64 | 
 65 |         # Episode tracking info
 66 |         self.curr_distance = None
 67 |         self.prev_distance = None
 68 |         self.timestep = None
 69 |         self.stopped = None
 70 |         self.path_length = None
 71 |         self.last_sim_location = None
 72 |         self.trajectory_states = []
 73 |         self.info = {}
 74 |         self.info['distance_to_goal'] = None
 75 |         self.info['spl'] = None
 76 |         self.info['success'] = None
 77 | 
 78 |     def load_new_episode(self):
 79 |         """The function loads a fixed episode from the episode dataset. This
 80 |         function is used for evaluating a trained model on the val split.
 81 |         """
 82 | 
 83 |         args = self.args
 84 |         self.scene_path = self.habitat_env.sim.config.SCENE
 85 |         scene_name = self.scene_path.split("/")[-1].split(".")[0]
 86 | 
 87 |         if self.scene_path != self.last_scene_path:
 88 |             episodes_file = self.episodes_dir + \
 89 |                 "content/{}_episodes.json.gz".format(scene_name)
 90 | 
 91 |             print("Loading episodes from: {}".format(episodes_file))
 92 |             with gzip.open(episodes_file, 'r') as f:
 93 |                 self.eps_data = json.loads(
 94 |                     f.read().decode('utf-8'))["episodes"]
 95 | 
 96 |             self.eps_data_idx = 0
 97 |             self.last_scene_path = self.scene_path
 98 | 
 99 |         # Load episode info
100 |         episode = self.eps_data[self.eps_data_idx]
101 |         self.eps_data_idx += 1
102 |         self.eps_data_idx = self.eps_data_idx % len(self.eps_data)
103 |         pos = episode["start_position"]
104 |         rot = quaternion.from_float_array(episode["start_rotation"])
105 | 
106 |         goal_name = episode["object_category"]
107 |         goal_idx = episode["object_id"]
108 |         floor_idx = episode["floor_id"]
109 | 
110 |         # Load scene info
111 |         scene_info = self.dataset_info[scene_name]
112 |         sem_map = scene_info[floor_idx]['sem_map']
113 |         map_obj_origin = scene_info[floor_idx]['origin']
114 | 
115 |         # Setup ground truth planner
116 |         object_boundary = args.success_dist
117 |         map_resolution = args.map_resolution
118 |         selem = skimage.morphology.disk(2)
119 |         traversible = skimage.morphology.binary_dilation(
120 |             sem_map[0], selem) != True
121 |         traversible = 1 - traversible
122 |         planner = FMMPlanner(traversible)
123 |         selem = skimage.morphology.disk(
124 |             int(object_boundary * 100. / map_resolution))
125 |         goal_map = skimage.morphology.binary_dilation(
126 |             sem_map[goal_idx + 1], selem) != True
127 |         goal_map = 1 - goal_map
128 |         planner.set_multi_goal(goal_map)
129 | 
130 |         # Get starting loc in GT map coordinates
131 |         x = -pos[2]
132 |         y = -pos[0]
133 |         min_x, min_y = map_obj_origin / 100.0
134 |         map_loc = int((-y - min_y) * 20.), int((-x - min_x) * 20.)
135 | 
136 |         self.gt_planner = planner
137 |         self.starting_loc = map_loc
138 |         self.object_boundary = object_boundary
139 |         self.goal_idx = goal_idx
140 |         self.goal_name = goal_name
141 |         self.map_obj_origin = map_obj_origin
142 | 
143 |         self.starting_distance = self.gt_planner.fmm_dist[self.starting_loc]\
144 |             / 20.0 + self.object_boundary
145 |         self.prev_distance = self.starting_distance
146 |         self._env.sim.set_agent_state(pos, rot)
147 | 
148 |         # The following two should match approximately
149 |         # print(starting_loc)
150 |         # print(self.sim_continuous_to_sim_map(self.get_sim_location()))
151 | 
152 |         obs = self._env.sim.get_observations_at(pos, rot)
153 | 
154 |         return obs
155 | 
156 |     def generate_new_episode(self):
157 |         """The function generates a random valid episode. This function is used
158 |         for training a model on the train split.
159 |         """
160 | 
161 |         args = self.args
162 | 
163 |         self.scene_path = self.habitat_env.sim.config.SCENE
164 |         scene_name = self.scene_path.split("/")[-1].split(".")[0]
165 | 
166 |         scene_info = self.dataset_info[scene_name]
167 |         map_resolution = args.map_resolution
168 | 
169 |         floor_idx = np.random.randint(len(scene_info.keys()))
170 |         floor_height = scene_info[floor_idx]['floor_height']
171 |         sem_map = scene_info[floor_idx]['sem_map']
172 |         map_obj_origin = scene_info[floor_idx]['origin']
173 | 
174 |         cat_counts = sem_map.sum(2).sum(1)
175 |         possible_cats = list(np.arange(6))
176 | 
177 |         for i in range(6):
178 |             if cat_counts[i + 1] == 0:
179 |                 possible_cats.remove(i)
180 | 
181 |         object_boundary = args.success_dist
182 | 
183 |         loc_found = False
184 |         while not loc_found:
185 |             if len(possible_cats) == 0:
186 |                 print("No valid objects for {}".format(floor_height))
187 |                 eps = eps - 1
188 |                 continue
189 | 
190 |             goal_idx = np.random.choice(possible_cats)
191 | 
192 |             for key, value in coco_categories.items():
193 |                 if value == goal_idx:
194 |                     goal_name = key
195 | 
196 |             selem = skimage.morphology.disk(2)
197 |             traversible = skimage.morphology.binary_dilation(
198 |                 sem_map[0], selem) != True
199 |             traversible = 1 - traversible
200 | 
201 |             planner = FMMPlanner(traversible)
202 | 
203 |             selem = skimage.morphology.disk(
204 |                 int(object_boundary * 100. / map_resolution))
205 |             goal_map = skimage.morphology.binary_dilation(
206 |                 sem_map[goal_idx + 1], selem) != True
207 |             goal_map = 1 - goal_map
208 | 
209 |             planner.set_multi_goal(goal_map)
210 | 
211 |             m1 = sem_map[0] > 0
212 |             m2 = planner.fmm_dist > (args.min_d - object_boundary) * 20.0
213 |             m3 = planner.fmm_dist < (args.max_d - object_boundary) * 20.0
214 | 
215 |             possible_starting_locs = np.logical_and(m1, m2)
216 |             possible_starting_locs = np.logical_and(
217 |                 possible_starting_locs, m3) * 1.
218 |             if possible_starting_locs.sum() != 0:
219 |                 loc_found = True
220 |             else:
221 |                 print("Invalid object: {} / {} / {}".format(
222 |                     scene_name, floor_height, goal_name))
223 |                 possible_cats.remove(goal_idx)
224 |                 scene_info[floor_idx]["sem_map"][goal_idx + 1, :, :] = 0.
225 |                 self.dataset_info[scene_name][floor_idx][
226 |                     "sem_map"][goal_idx + 1, :, :] = 0.
227 | 
228 |         loc_found = False
229 |         while not loc_found:
230 |             pos = self._env.sim.sample_navigable_point()
231 |             x = -pos[2]
232 |             y = -pos[0]
233 |             min_x, min_y = map_obj_origin / 100.0
234 |             map_loc = int((-y - min_y) * 20.), int((-x - min_x) * 20.)
235 |             if abs(pos[1] - floor_height) < args.floor_thr / 100.0 and \
236 |                     possible_starting_locs[map_loc[0], map_loc[1]] == 1:
237 |                 loc_found = True
238 | 
239 |         agent_state = self._env.sim.get_agent_state(0)
240 |         rotation = agent_state.rotation
241 |         rvec = quaternion.as_rotation_vector(rotation)
242 |         rvec[1] = np.random.rand() * 2 * np.pi
243 |         rot = quaternion.from_rotation_vector(rvec)
244 | 
245 |         self.gt_planner = planner
246 |         self.starting_loc = map_loc
247 |         self.object_boundary = object_boundary
248 |         self.goal_idx = goal_idx
249 |         self.goal_name = goal_name
250 |         self.map_obj_origin = map_obj_origin
251 | 
252 |         self.starting_distance = self.gt_planner.fmm_dist[self.starting_loc] \
253 |             / 20.0 + self.object_boundary
254 |         self.prev_distance = self.starting_distance
255 | 
256 |         self._env.sim.set_agent_state(pos, rot)
257 | 
258 |         # The following two should match approximately
259 |         # print(starting_loc)
260 |         # print(self.sim_continuous_to_sim_map(self.get_sim_location()))
261 | 
262 |         obs = self._env.sim.get_observations_at(pos, rot)
263 | 
264 |         return obs
265 | 
266 |     def sim_map_to_sim_continuous(self, coords):
267 |         """Converts ground-truth 2D Map coordinates to absolute Habitat
268 |         simulator position and rotation.
269 |         """
270 |         agent_state = self._env.sim.get_agent_state(0)
271 |         y, x = coords
272 |         min_x, min_y = self.map_obj_origin / 100.0
273 | 
274 |         cont_x = x / 20. + min_x
275 |         cont_y = y / 20. + min_y
276 |         agent_state.position[0] = cont_y
277 |         agent_state.position[2] = cont_x
278 | 
279 |         rotation = agent_state.rotation
280 |         rvec = quaternion.as_rotation_vector(rotation)
281 | 
282 |         if self.args.train_single_eps:
283 |             rvec[1] = 0.0
284 |         else:
285 |             rvec[1] = np.random.rand() * 2 * np.pi
286 |         rot = quaternion.from_rotation_vector(rvec)
287 | 
288 |         return agent_state.position, rot
289 | 
290 |     def sim_continuous_to_sim_map(self, sim_loc):
291 |         """Converts absolute Habitat simulator pose to ground-truth 2D Map
292 |         coordinates.
293 |         """
294 |         x, y, o = sim_loc
295 |         min_x, min_y = self.map_obj_origin / 100.0
296 |         x, y = int((-x - min_x) * 20.), int((-y - min_y) * 20.)
297 | 
298 |         o = np.rad2deg(o) + 180.0
299 |         return y, x, o
300 | 
301 |     def reset(self):
302 |         """Resets the environment to a new episode.
303 | 
304 |         Returns:
305 |             obs (ndarray): RGBD observations (4 x H x W)
306 |             info (dict): contains timestep, pose, goal category and
307 |                          evaluation metric info
308 |         """
309 |         args = self.args
310 |         new_scene = self.episode_no % args.num_train_episodes == 0
311 | 
312 |         self.episode_no += 1
313 | 
314 |         # Initializations
315 |         self.timestep = 0
316 |         self.stopped = False
317 |         self.path_length = 1e-5
318 |         self.trajectory_states = []
319 | 
320 |         if new_scene:
321 |             obs = super().reset()
322 |             self.scene_name = self.habitat_env.sim.config.SCENE
323 |             print("Changing scene: {}/{}".format(self.rank, self.scene_name))
324 | 
325 |         self.scene_path = self.habitat_env.sim.config.SCENE
326 | 
327 |         if self.split == "val":
328 |             obs = self.load_new_episode()
329 |         else:
330 |             obs = self.generate_new_episode()
331 | 
332 |         rgb = obs['rgb'].astype(np.uint8)
333 |         depth = obs['depth']
334 |         state = np.concatenate((rgb, depth), axis=2).transpose(2, 0, 1)
335 |         self.last_sim_location = self.get_sim_location()
336 | 
337 |         # Set info
338 |         self.info['time'] = self.timestep
339 |         self.info['sensor_pose'] = [0., 0., 0.]
340 |         self.info['goal_cat_id'] = self.goal_idx
341 |         self.info['goal_name'] = self.goal_name
342 | 
343 |         return state, self.info
344 | 
345 |     def step(self, action):
346 |         """Function to take an action in the environment.
347 | 
348 |         Args:
349 |             action (dict):
350 |                 dict with following keys:
351 |                     'action' (int): 0: stop, 1: forward, 2: left, 3: right
352 | 
353 |         Returns:
354 |             obs (ndarray): RGBD observations (4 x H x W)
355 |             reward (float): amount of reward returned after previous action
356 |             done (bool): whether the episode has ended
357 |             info (dict): contains timestep, pose, goal category and
358 |                          evaluation metric info
359 |         """
360 |         action = action["action"]
361 |         if action == 0:
362 |             self.stopped = True
363 |             # Not sending stop to simulator, resetting manually
364 |             action = 3
365 | 
366 |         obs, rew, done, _ = super().step(action)
367 | 
368 |         # Get pose change
369 |         dx, dy, do = self.get_pose_change()
370 |         self.info['sensor_pose'] = [dx, dy, do]
371 |         self.path_length += pu.get_l2_distance(0, dx, 0, dy)
372 | 
373 |         spl, success, dist = 0., 0., 0.
374 |         if done:
375 |             spl, success, dist = self.get_metrics()
376 |             self.info['distance_to_goal'] = dist
377 |             self.info['spl'] = spl
378 |             self.info['success'] = success
379 | 
380 |         rgb = obs['rgb'].astype(np.uint8)
381 |         depth = obs['depth']
382 |         state = np.concatenate((rgb, depth), axis=2).transpose(2, 0, 1)
383 | 
384 |         self.timestep += 1
385 |         self.info['time'] = self.timestep
386 | 
387 |         return state, rew, done, self.info
388 | 
389 |     def get_reward_range(self):
390 |         """This function is not used, Habitat-RLEnv requires this function"""
391 |         return (0., 1.0)
392 | 
393 |     def get_reward(self, observations):
394 |         curr_loc = self.sim_continuous_to_sim_map(self.get_sim_location())
395 |         self.curr_distance = self.gt_planner.fmm_dist[curr_loc[0],
396 |                                                       curr_loc[1]] / 20.0
397 | 
398 |         reward = (self.prev_distance - self.curr_distance) * \
399 |             self.args.reward_coeff
400 | 
401 |         self.prev_distance = self.curr_distance
402 |         return reward
403 | 
404 |     def get_metrics(self):
405 |         """This function computes evaluation metrics for the Object Goal task
406 | 
407 |         Returns:
408 |             spl (float): Success weighted by Path Length
409 |                         (See https://arxiv.org/pdf/1807.06757.pdf)
410 |             success (int): 0: Failure, 1: Successful
411 |             dist (float): Distance to Success (DTS),  distance of the agent
412 |                         from the success threshold boundary in meters.
413 |                         (See https://arxiv.org/pdf/2007.00643.pdf)
414 |         """
415 |         curr_loc = self.sim_continuous_to_sim_map(self.get_sim_location())
416 |         dist = self.gt_planner.fmm_dist[curr_loc[0], curr_loc[1]] / 20.0
417 |         if dist == 0.0:
418 |             success = 1
419 |         else:
420 |             success = 0
421 |         spl = min(success * self.starting_distance / self.path_length, 1)
422 |         return spl, success, dist
423 | 
424 |     def get_done(self, observations):
425 |         if self.info['time'] >= self.args.max_episode_length - 1:
426 |             done = True
427 |         elif self.stopped:
428 |             done = True
429 |         else:
430 |             done = False
431 |         return done
432 | 
433 |     def get_info(self, observations):
434 |         """This function is not used, Habitat-RLEnv requires this function"""
435 |         info = {}
436 |         return info
437 | 
438 |     def get_spaces(self):
439 |         """Returns observation and action spaces for the ObjectGoal task."""
440 |         return self.observation_space, self.action_space
441 | 
442 |     def get_sim_location(self):
443 |         """Returns x, y, o pose of the agent in the Habitat simulator."""
444 | 
445 |         agent_state = super().habitat_env.sim.get_agent_state(0)
446 |         x = -agent_state.position[2]
447 |         y = -agent_state.position[0]
448 |         axis = quaternion.as_euler_angles(agent_state.rotation)[0]
449 |         if (axis % (2 * np.pi)) < 0.1 or (axis %
450 |                                           (2 * np.pi)) > 2 * np.pi - 0.1:
451 |             o = quaternion.as_euler_angles(agent_state.rotation)[1]
452 |         else:
453 |             o = 2 * np.pi - quaternion.as_euler_angles(agent_state.rotation)[1]
454 |         if o > np.pi:
455 |             o -= 2 * np.pi
456 |         return x, y, o
457 | 
458 |     def get_pose_change(self):
459 |         """Returns dx, dy, do pose change of the agent relative to the last
460 |         timestep."""
461 |         curr_sim_pose = self.get_sim_location()
462 |         dx, dy, do = pu.get_rel_pose_change(
463 |             curr_sim_pose, self.last_sim_location)
464 |         self.last_sim_location = curr_sim_pose
465 |         return dx, dy, do
466 | 


--------------------------------------------------------------------------------
/envs/habitat/utils/vector_env.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright (c) Facebook, Inc. and its affiliates.
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from multiprocessing.connection import Connection
  8 | from multiprocessing.context import BaseContext
  9 | from queue import Queue
 10 | from threading import Thread
 11 | from typing import (
 12 |     Any,
 13 |     Callable,
 14 |     Dict,
 15 |     List,
 16 |     Optional,
 17 |     Sequence,
 18 |     Set,
 19 |     Tuple,
 20 |     Union,
 21 | )
 22 | 
 23 | import gym
 24 | import numpy as np
 25 | from gym.spaces.dict_space import Dict as SpaceDict
 26 | 
 27 | import habitat
 28 | from habitat.config import Config
 29 | from habitat.core.env import Env, Observations, RLEnv
 30 | from habitat.core.logging import logger
 31 | from habitat.core.utils import tile_images
 32 | 
 33 | try:
 34 |     # Use torch.multiprocessing if we can.
 35 |     # We have yet to find a reason to not use it and
 36 |     # you are required to use it when sending a torch.Tensor
 37 |     # between processes
 38 |     import torch.multiprocessing as mp
 39 | except ImportError:
 40 |     import multiprocessing as mp
 41 | 
 42 | STEP_COMMAND = "step"
 43 | RESET_COMMAND = "reset"
 44 | RENDER_COMMAND = "render"
 45 | CLOSE_COMMAND = "close"
 46 | OBSERVATION_SPACE_COMMAND = "observation_space"
 47 | ACTION_SPACE_COMMAND = "action_space"
 48 | CALL_COMMAND = "call"
 49 | EPISODE_COMMAND = "current_episode"
 50 | PLAN_ACT_AND_PREPROCESS = "plan_act_and_preprocess"
 51 | COUNT_EPISODES_COMMAND = "count_episodes"
 52 | EPISODE_OVER = "episode_over"
 53 | GET_METRICS = "get_metrics"
 54 | 
 55 | 
 56 | def _make_env_fn(
 57 |     config: Config, dataset: Optional[habitat.Dataset] = None, rank: int = 0
 58 | ) -> Env:
 59 |     """Constructor for default habitat `env.Env`.
 60 | 
 61 |     :param config: configuration for environment.
 62 |     :param dataset: dataset for environment.
 63 |     :param rank: rank for setting seed of environment
 64 |     :return: `env.Env` / `env.RLEnv` object
 65 |     """
 66 |     habitat_env = Env(config=config, dataset=dataset)
 67 |     habitat_env.seed(config.SEED + rank)
 68 |     return habitat_env
 69 | 
 70 | 
 71 | class VectorEnv:
 72 |     r"""Vectorized environment which creates multiple processes where each
 73 |     process runs its own environment. Main class for parallelization of
 74 |     training and evaluation.
 75 | 
 76 | 
 77 |     All the environments are synchronized on step and reset methods.
 78 |     """
 79 | 
 80 |     observation_spaces: List[SpaceDict]
 81 |     action_spaces: List[SpaceDict]
 82 |     _workers: List[Union[mp.Process, Thread]]
 83 |     _is_waiting: bool
 84 |     _num_envs: int
 85 |     _auto_reset_done: bool
 86 |     _mp_ctx: BaseContext
 87 |     _connection_read_fns: List[Callable[[], Any]]
 88 |     _connection_write_fns: List[Callable[[Any], None]]
 89 | 
 90 |     def __init__(
 91 |         self,
 92 |         make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn,
 93 |         env_fn_args: Sequence[Tuple] = None,
 94 |         auto_reset_done: bool = True,
 95 |         multiprocessing_start_method: str = "forkserver",
 96 |     ) -> None:
 97 |         """..
 98 | 
 99 |         :param make_env_fn: function which creates a single environment. An
100 |             environment can be of type `env.Env` or `env.RLEnv`
101 |         :param env_fn_args: tuple of tuple of args to pass to the
102 |             `_make_env_fn`.
103 |         :param auto_reset_done: automatically reset the environment when
104 |             done. This functionality is provided for seamless training
105 |             of vectorized environments.
106 |         :param multiprocessing_start_method: the multiprocessing method used to
107 |             spawn worker processes. Valid methods are
108 |             :py:`{'spawn', 'forkserver', 'fork'}`; :py:`'forkserver'` is the
109 |             recommended method as it works well with CUDA. If :py:`'fork'` is
110 |             used, the subproccess  must be started before any other GPU useage.
111 |         """
112 |         self._is_waiting = False
113 |         self._is_closed = True
114 | 
115 |         assert (
116 |             env_fn_args is not None and len(env_fn_args) > 0
117 |         ), "number of environments to be created should be greater than 0"
118 | 
119 |         self._num_envs = len(env_fn_args)
120 | 
121 |         assert multiprocessing_start_method in self._valid_start_methods, (
122 |             "multiprocessing_start_method must be one of {}. Got '{}'"
123 |         ).format(self._valid_start_methods, multiprocessing_start_method)
124 |         self._auto_reset_done = auto_reset_done
125 |         self._mp_ctx = mp.get_context(multiprocessing_start_method)
126 |         self._workers = []
127 |         (
128 |             self._connection_read_fns,
129 |             self._connection_write_fns,
130 |         ) = self._spawn_workers(  # noqa
131 |             env_fn_args, make_env_fn
132 |         )
133 | 
134 |         self._is_closed = False
135 | 
136 |         for write_fn in self._connection_write_fns:
137 |             write_fn((OBSERVATION_SPACE_COMMAND, None))
138 |         self.observation_spaces = [
139 |             read_fn() for read_fn in self._connection_read_fns
140 |         ]
141 |         for write_fn in self._connection_write_fns:
142 |             write_fn((ACTION_SPACE_COMMAND, None))
143 |         self.action_spaces = [
144 |             read_fn() for read_fn in self._connection_read_fns
145 |         ]
146 |         self.observation_space = self.observation_spaces[0]
147 |         self.action_space = self.action_spaces[0]
148 |         self._paused = []
149 | 
150 |     @property
151 |     def num_envs(self):
152 |         r"""number of individual environments.
153 |         """
154 |         return self._num_envs - len(self._paused)
155 | 
156 |     @staticmethod
157 |     def _worker_env(
158 |         connection_read_fn: Callable,
159 |         connection_write_fn: Callable,
160 |         env_fn: Callable,
161 |         env_fn_args: Tuple[Any],
162 |         auto_reset_done: bool,
163 |         child_pipe: Optional[Connection] = None,
164 |         parent_pipe: Optional[Connection] = None,
165 |     ) -> None:
166 |         r"""process worker for creating and interacting with the environment.
167 |         """
168 |         env = env_fn(*env_fn_args)
169 |         if parent_pipe is not None:
170 |             parent_pipe.close()
171 |         try:
172 |             command, data = connection_read_fn()
173 |             while command != CLOSE_COMMAND:
174 |                 if command == STEP_COMMAND:
175 |                     # different step methods for habitat.RLEnv and habitat.Env
176 |                     if isinstance(env, habitat.RLEnv) or isinstance(
177 |                         env, gym.Env
178 |                     ):
179 |                         # habitat.RLEnv
180 |                         observations, reward, done, info = env.step(**data)
181 |                         if auto_reset_done and done:
182 |                             observations, info = env.reset()
183 |                         connection_write_fn((observations, reward, done, info))
184 |                     elif isinstance(env, habitat.Env):
185 |                         # habitat.Env
186 |                         observations = env.step(**data)
187 |                         if auto_reset_done and env.episode_over:
188 |                             observations = env.reset()
189 |                         connection_write_fn(observations)
190 |                     else:
191 |                         raise NotImplementedError
192 | 
193 |                 elif command == RESET_COMMAND:
194 |                     observations = env.reset()
195 |                     connection_write_fn(observations)
196 | 
197 |                 elif command == RENDER_COMMAND:
198 |                     connection_write_fn(env.render(*data[0], **data[1]))
199 | 
200 |                 elif (
201 |                     command == OBSERVATION_SPACE_COMMAND
202 |                     or command == ACTION_SPACE_COMMAND
203 |                 ):
204 |                     if isinstance(command, str):
205 |                         connection_write_fn(getattr(env, command))
206 | 
207 |                 elif command == CALL_COMMAND:
208 |                     function_name, function_args = data
209 |                     if function_args is None or len(function_args) == 0:
210 |                         result = getattr(env, function_name)()
211 |                     else:
212 |                         result = getattr(env, function_name)(**function_args)
213 |                     connection_write_fn(result)
214 | 
215 |                 # TODO: update CALL_COMMAND for getting attribute like this
216 |                 elif command == EPISODE_COMMAND:
217 |                     connection_write_fn(env.current_episode)
218 | 
219 |                 elif command == PLAN_ACT_AND_PREPROCESS:
220 |                     observations, reward, done, info = \
221 |                             env.plan_act_and_preprocess(data)
222 |                     if auto_reset_done and done:
223 |                         observations, info = env.reset()
224 |                     connection_write_fn((observations, reward, done, info))
225 | 
226 |                 elif command == COUNT_EPISODES_COMMAND:
227 |                     connection_write_fn(len(env.episodes))
228 | 
229 |                 elif command == EPISODE_OVER:
230 |                     connection_write_fn(env.episode_over)
231 | 
232 |                 elif command == GET_METRICS:
233 |                     result = env.get_metrics()
234 |                     connection_write_fn(result)
235 | 
236 |                 else:
237 |                     raise NotImplementedError
238 | 
239 |                 command, data = connection_read_fn()
240 | 
241 |             if child_pipe is not None:
242 |                 child_pipe.close()
243 |         except KeyboardInterrupt:
244 |             logger.info("Worker KeyboardInterrupt")
245 |         finally:
246 |             env.close()
247 | 
248 |     def _spawn_workers(
249 |         self,
250 |         env_fn_args: Sequence[Tuple],
251 |         make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn,
252 |     ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]:
253 |         parent_connections, worker_connections = zip(
254 |             *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_envs)]
255 |         )
256 |         self._workers = []
257 |         for worker_conn, parent_conn, env_args in zip(
258 |             worker_connections, parent_connections, env_fn_args
259 |         ):
260 |             ps = self._mp_ctx.Process(
261 |                 target=self._worker_env,
262 |                 args=(
263 |                     worker_conn.recv,
264 |                     worker_conn.send,
265 |                     make_env_fn,
266 |                     env_args,
267 |                     self._auto_reset_done,
268 |                     worker_conn,
269 |                     parent_conn,
270 |                 ),
271 |             )
272 |             self._workers.append(ps)
273 |             ps.daemon = True
274 |             ps.start()
275 |             worker_conn.close()
276 |         return (
277 |             [p.recv for p in parent_connections],
278 |             [p.send for p in parent_connections],
279 |         )
280 | 
281 |     def current_episodes(self):
282 |         self._is_waiting = True
283 |         for write_fn in self._connection_write_fns:
284 |             write_fn((EPISODE_COMMAND, None))
285 |         results = []
286 |         for read_fn in self._connection_read_fns:
287 |             results.append(read_fn())
288 |         self._is_waiting = False
289 |         return results
290 | 
291 |     def count_episodes(self):
292 |         self._is_waiting = True
293 |         for write_fn in self._connection_write_fns:
294 |             write_fn((COUNT_EPISODES_COMMAND, None))
295 |         results = []
296 |         for read_fn in self._connection_read_fns:
297 |             results.append(read_fn())
298 |         self._is_waiting = False
299 |         return results
300 | 
301 |     def episode_over(self):
302 |         self._is_waiting = True
303 |         for write_fn in self._connection_write_fns:
304 |             write_fn((EPISODE_OVER, None))
305 |         results = []
306 |         for read_fn in self._connection_read_fns:
307 |             results.append(read_fn())
308 |         self._is_waiting = False
309 |         return results
310 | 
311 |     def get_metrics(self):
312 |         self._is_waiting = True
313 |         for write_fn in self._connection_write_fns:
314 |             write_fn((GET_METRICS, None))
315 |         results = []
316 |         for read_fn in self._connection_read_fns:
317 |             results.append(read_fn())
318 |         self._is_waiting = False
319 |         return results
320 | 
321 |     def reset(self):
322 |         r"""Reset all the vectorized environments
323 | 
324 |         :return: list of outputs from the reset method of envs.
325 |         """
326 |         self._is_waiting = True
327 |         for write_fn in self._connection_write_fns:
328 |             write_fn((RESET_COMMAND, None))
329 |         results = []
330 |         for read_fn in self._connection_read_fns:
331 |             results.append(read_fn())
332 |         obs, infos = zip(*results)
333 | 
334 |         self._is_waiting = False
335 |         return np.stack(obs), infos
336 | 
337 |     def reset_at(self, index_env: int):
338 |         r"""Reset in the index_env environment in the vector.
339 | 
340 |         :param index_env: index of the environment to be reset
341 |         :return: list containing the output of reset method of indexed env.
342 |         """
343 |         self._is_waiting = True
344 |         self._connection_write_fns[index_env]((RESET_COMMAND, None))
345 |         results = [self._connection_read_fns[index_env]()]
346 |         self._is_waiting = False
347 |         return results
348 | 
349 |     def step_at(self, index_env: int, action: Dict[str, Any]):
350 |         r"""Step in the index_env environment in the vector.
351 | 
352 |         :param index_env: index of the environment to be stepped into
353 |         :param action: action to be taken
354 |         :return: list containing the output of step method of indexed env.
355 |         """
356 |         self._is_waiting = True
357 |         self._connection_write_fns[index_env]((STEP_COMMAND, action))
358 |         results = [self._connection_read_fns[index_env]()]
359 |         self._is_waiting = False
360 |         return results
361 | 
362 |     def step_async(self, data: List[Union[int, str, Dict[str, Any]]]) -> None:
363 |         r"""Asynchronously step in the environments.
364 | 
365 |         :param data: list of size _num_envs containing keyword arguments to
366 |             pass to `step` method for each Environment. For example,
367 |             :py:`[{"action": "TURN_LEFT", "action_args": {...}}, ...]`.
368 |         """
369 |         # Backward compatibility
370 |         if isinstance(data[0], (int, np.integer, str)):
371 |             data = [{"action": {"action": action}} for action in data]
372 | 
373 |         self._is_waiting = True
374 |         for write_fn, args in zip(self._connection_write_fns, data):
375 |             write_fn((STEP_COMMAND, args))
376 | 
377 |     def step_wait(self) -> List[Observations]:
378 |         r"""Wait until all the asynchronized environments have synchronized.
379 |         """
380 |         results = []
381 |         for read_fn in self._connection_read_fns:
382 |             results.append(read_fn())
383 |         self._is_waiting = False
384 |         obs, rews, dones, infos = zip(*results)
385 |         return np.stack(obs), np.stack(rews), np.stack(dones), infos
386 | 
387 |     def step(self, data: List[Union[int, str, Dict[str, Any]]]) -> List[Any]:
388 |         r"""Perform actions in the vectorized environments.
389 | 
390 |         :param data: list of size _num_envs containing keyword arguments to
391 |             pass to `step` method for each Environment. For example,
392 |             :py:`[{"action": "TURN_LEFT", "action_args": {...}}, ...]`.
393 |         :return: list of outputs from the step method of envs.
394 |         """
395 |         self.step_async(data)
396 |         return self.step_wait()
397 | 
398 |     def close(self) -> None:
399 |         if self._is_closed:
400 |             return
401 | 
402 |         if self._is_waiting:
403 |             for read_fn in self._connection_read_fns:
404 |                 read_fn()
405 | 
406 |         for write_fn in self._connection_write_fns:
407 |             write_fn((CLOSE_COMMAND, None))
408 | 
409 |         for _, _, write_fn, _ in self._paused:
410 |             write_fn((CLOSE_COMMAND, None))
411 | 
412 |         for process in self._workers:
413 |             process.join()
414 | 
415 |         for _, _, _, process in self._paused:
416 |             process.join()
417 | 
418 |         self._is_closed = True
419 | 
420 |     def pause_at(self, index: int) -> None:
421 |         r"""Pauses computation on this env without destroying the env.
422 | 
423 |         :param index: which env to pause. All indexes after this one will be
424 |             shifted down by one.
425 | 
426 |         This is useful for not needing to call steps on all environments when
427 |         only some are active (for example during the last episodes of running
428 |         eval episodes).
429 |         """
430 |         if self._is_waiting:
431 |             for read_fn in self._connection_read_fns:
432 |                 read_fn()
433 |         read_fn = self._connection_read_fns.pop(index)
434 |         write_fn = self._connection_write_fns.pop(index)
435 |         worker = self._workers.pop(index)
436 |         self._paused.append((index, read_fn, write_fn, worker))
437 | 
438 |     def resume_all(self) -> None:
439 |         r"""Resumes any paused envs.
440 |         """
441 |         for index, read_fn, write_fn, worker in reversed(self._paused):
442 |             self._connection_read_fns.insert(index, read_fn)
443 |             self._connection_write_fns.insert(index, write_fn)
444 |             self._workers.insert(index, worker)
445 |         self._paused = []
446 | 
447 |     def call_at(
448 |         self,
449 |         index: int,
450 |         function_name: str,
451 |         function_args: Optional[Dict[str, Any]] = None,
452 |     ) -> Any:
453 |         r"""Calls a function (which is passed by name) on the selected env and
454 |         returns the result.
455 | 
456 |         :param index: which env to call the function on.
457 |         :param function_name: the name of the function to call on the env.
458 |         :param function_args: optional function args.
459 |         :return: result of calling the function.
460 |         """
461 |         self._is_waiting = True
462 |         self._connection_write_fns[index](
463 |             (CALL_COMMAND, (function_name, function_args))
464 |         )
465 |         result = self._connection_read_fns[index]()
466 |         self._is_waiting = False
467 |         return result
468 | 
469 |     def call(
470 |         self,
471 |         function_names: List[str],
472 |         function_args_list: Optional[List[Any]] = None,
473 |     ) -> List[Any]:
474 |         r"""Calls a list of functions (which are passed by name) on the
475 |         corresponding env (by index).
476 | 
477 |         :param function_names: the name of the functions to call on the envs.
478 |         :param function_args_list: list of function args for each function. If
479 |             provided, :py:`len(function_args_list)` should be as long as
480 |             :py:`len(function_names)`.
481 |         :return: result of calling the function.
482 |         """
483 |         self._is_waiting = True
484 |         if function_args_list is None:
485 |             function_args_list = [None] * len(function_names)
486 |         assert len(function_names) == len(function_args_list)
487 |         func_args = zip(function_names, function_args_list)
488 |         for write_fn, func_args_on in zip(
489 |             self._connection_write_fns, func_args
490 |         ):
491 |             write_fn((CALL_COMMAND, func_args_on))
492 |         results = []
493 |         for read_fn in self._connection_read_fns:
494 |             results.append(read_fn())
495 |         self._is_waiting = False
496 |         return results
497 | 
498 |     def render(
499 |         self, mode: str = "human", *args, **kwargs
500 |     ) -> Union[np.ndarray, None]:
501 |         r"""Render observations from all environments in a tiled image.
502 |         """
503 |         for write_fn in self._connection_write_fns:
504 |             write_fn((RENDER_COMMAND, (args, {"mode": "rgb", **kwargs})))
505 |         images = [read_fn() for read_fn in self._connection_read_fns]
506 |         tile = tile_images(images)
507 |         if mode == "human":
508 |             from habitat.core.utils import try_cv2_import
509 | 
510 |             cv2 = try_cv2_import()
511 | 
512 |             cv2.imshow("vecenv", tile[:, :, ::-1])
513 |             cv2.waitKey(1)
514 |             return None
515 |         elif mode == "rgb_array":
516 |             return tile
517 |         else:
518 |             raise NotImplementedError
519 | 
520 |     def plan_act_and_preprocess(self, inputs):
521 |         self._assert_not_closed()
522 |         self._is_waiting = True
523 |         for e, write_fn in enumerate(self._connection_write_fns):
524 |             write_fn((PLAN_ACT_AND_PREPROCESS, inputs[e]))
525 |         results = []
526 |         for read_fn in self._connection_read_fns:
527 |             results.append(read_fn())
528 |         obs, rews, dones, infos = zip(*results)
529 |         self._is_waiting = False
530 |         return np.stack(obs), np.stack(rews), np.stack(dones), infos
531 | 
532 |     def _assert_not_closed(self):
533 |         assert not self._is_closed, "Trying to operate on a SubprocVecEnv after calling close()"
534 | 
535 |     @property
536 |     def _valid_start_methods(self) -> Set[str]:
537 |         return {"forkserver", "spawn", "fork"}
538 | 
539 |     def __del__(self):
540 |         self.close()
541 | 
542 |     def __enter__(self):
543 |         return self
544 | 
545 |     def __exit__(self, exc_type, exc_val, exc_tb):
546 |         self.close()
547 | 
548 | 
549 | class ThreadedVectorEnv(VectorEnv):
550 |     r"""Provides same functionality as `VectorEnv`, the only difference is it
551 |     runs in a multi-thread setup inside a single process.
552 | 
553 |     `VectorEnv` runs in a multi-proc setup. This makes it much easier to debug
554 |     when using `VectorEnv` because you can actually put break points in the
555 |     environment methods. It should not be used for best performance.
556 |     """
557 | 
558 |     def _spawn_workers(
559 |         self,
560 |         env_fn_args: Sequence[Tuple],
561 |         make_env_fn: Callable[..., Env] = _make_env_fn,
562 |     ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]:
563 |         parent_read_queues, parent_write_queues = zip(
564 |             *[(Queue(), Queue()) for _ in range(self._num_envs)]
565 |         )
566 |         self._workers = []
567 |         for parent_read_queue, parent_write_queue, env_args in zip(
568 |             parent_read_queues, parent_write_queues, env_fn_args
569 |         ):
570 |             thread = Thread(
571 |                 target=self._worker_env,
572 |                 args=(
573 |                     parent_write_queue.get,
574 |                     parent_read_queue.put,
575 |                     make_env_fn,
576 |                     env_args,
577 |                     self._auto_reset_done,
578 |                 ),
579 |             )
580 |             self._workers.append(thread)
581 |             thread.daemon = True
582 |             thread.start()
583 |         return (
584 |             [q.get for q in parent_read_queues],
585 |             [q.put for q in parent_write_queues],
586 |         )
587 | 


--------------------------------------------------------------------------------
/envs/utils/depth_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Utilities for processing depth images.
 17 | """
 18 | from argparse import Namespace
 19 | 
 20 | import itertools
 21 | import numpy as np
 22 | import torch
 23 | 
 24 | import envs.utils.rotation_utils as ru
 25 | 
 26 | 
 27 | def get_camera_matrix(width, height, fov):
 28 |     """Returns a camera matrix from image size and fov."""
 29 |     xc = (width - 1.) / 2.
 30 |     zc = (height - 1.) / 2.
 31 |     f = (width / 2.) / np.tan(np.deg2rad(fov / 2.))
 32 |     camera_matrix = {'xc': xc, 'zc': zc, 'f': f}
 33 |     camera_matrix = Namespace(**camera_matrix)
 34 |     return camera_matrix
 35 | 
 36 | 
 37 | def get_point_cloud_from_z(Y, camera_matrix, scale=1):
 38 |     """Projects the depth image Y into a 3D point cloud.
 39 |     Inputs:
 40 |         Y is ...xHxW
 41 |         camera_matrix
 42 |     Outputs:
 43 |         X is positive going right
 44 |         Y is positive into the image
 45 |         Z is positive up in the image
 46 |         XYZ is ...xHxWx3
 47 |     """
 48 |     x, z = np.meshgrid(np.arange(Y.shape[-1]),
 49 |                        np.arange(Y.shape[-2] - 1, -1, -1))
 50 |     for _ in range(Y.ndim - 2):
 51 |         x = np.expand_dims(x, axis=0)
 52 |         z = np.expand_dims(z, axis=0)
 53 |     X = (x[::scale, ::scale] - camera_matrix.xc) * \
 54 |         Y[::scale, ::scale] / camera_matrix.f
 55 |     Z = (z[::scale, ::scale] - camera_matrix.zc) * \
 56 |         Y[::scale, ::scale] / camera_matrix.f
 57 |     XYZ = np.concatenate((X[..., np.newaxis],
 58 |                           Y[::scale, ::scale][..., np.newaxis],
 59 |                           Z[..., np.newaxis]), axis=X.ndim)
 60 |     return XYZ
 61 | 
 62 | 
 63 | def transform_camera_view(XYZ, sensor_height, camera_elevation_degree):
 64 |     """
 65 |     Transforms the point cloud into geocentric frame to account for
 66 |     camera elevation and angle
 67 |     Input:
 68 |         XYZ                     : ...x3
 69 |         sensor_height           : height of the sensor
 70 |         camera_elevation_degree : camera elevation to rectify.
 71 |     Output:
 72 |         XYZ : ...x3
 73 |     """
 74 |     R = ru.get_r_matrix(
 75 |         [1., 0., 0.], angle=np.deg2rad(camera_elevation_degree))
 76 |     XYZ = np.matmul(XYZ.reshape(-1, 3), R.T).reshape(XYZ.shape)
 77 |     XYZ[..., 2] = XYZ[..., 2] + sensor_height
 78 |     return XYZ
 79 | 
 80 | 
 81 | def transform_pose(XYZ, current_pose):
 82 |     """
 83 |     Transforms the point cloud into geocentric frame to account for
 84 |     camera position
 85 |     Input:
 86 |         XYZ                     : ...x3
 87 |         current_pose            : camera position (x, y, theta (radians))
 88 |     Output:
 89 |         XYZ : ...x3
 90 |     """
 91 |     R = ru.get_r_matrix([0., 0., 1.], angle=current_pose[2] - np.pi / 2.)
 92 |     XYZ = np.matmul(XYZ.reshape(-1, 3), R.T).reshape(XYZ.shape)
 93 |     XYZ[:, :, 0] = XYZ[:, :, 0] + current_pose[0]
 94 |     XYZ[:, :, 1] = XYZ[:, :, 1] + current_pose[1]
 95 |     return XYZ
 96 | 
 97 | 
 98 | def bin_points(XYZ_cms, map_size, z_bins, xy_resolution):
 99 |     """Bins points into xy-z bins
100 |     XYZ_cms is ... x H x W x3
101 |     Outputs is ... x map_size x map_size x (len(z_bins)+1)
102 |     """
103 |     sh = XYZ_cms.shape
104 |     XYZ_cms = XYZ_cms.reshape([-1, sh[-3], sh[-2], sh[-1]])
105 |     n_z_bins = len(z_bins) + 1
106 |     counts = []
107 |     for XYZ_cm in XYZ_cms:
108 |         isnotnan = np.logical_not(np.isnan(XYZ_cm[:, :, 0]))
109 |         X_bin = np.round(XYZ_cm[:, :, 0] / xy_resolution).astype(np.int32)
110 |         Y_bin = np.round(XYZ_cm[:, :, 1] / xy_resolution).astype(np.int32)
111 |         Z_bin = np.digitize(XYZ_cm[:, :, 2], bins=z_bins).astype(np.int32)
112 | 
113 |         isvalid = np.array([X_bin >= 0, X_bin < map_size, Y_bin >= 0,
114 |                             Y_bin < map_size,
115 |                             Z_bin >= 0, Z_bin < n_z_bins, isnotnan])
116 |         isvalid = np.all(isvalid, axis=0)
117 | 
118 |         ind = (Y_bin * map_size + X_bin) * n_z_bins + Z_bin
119 |         ind[np.logical_not(isvalid)] = 0
120 |         count = np.bincount(ind.ravel(), isvalid.ravel().astype(np.int32),
121 |                             minlength=map_size * map_size * n_z_bins)
122 |         counts = np.reshape(count, [map_size, map_size, n_z_bins])
123 | 
124 |     counts = counts.reshape(list(sh[:-3]) + [map_size, map_size, n_z_bins])
125 | 
126 |     return counts
127 | 
128 | 
129 | def get_point_cloud_from_z_t(Y_t, camera_matrix, device, scale=1):
130 |     """Projects the depth image Y into a 3D point cloud.
131 |     Inputs:
132 |         Y is ...xHxW
133 |         camera_matrix
134 |     Outputs:
135 |         X is positive going right
136 |         Y is positive into the image
137 |         Z is positive up in the image
138 |         XYZ is ...xHxWx3
139 |     """
140 |     grid_x, grid_z = torch.meshgrid(torch.arange(Y_t.shape[-1]),
141 |                                     torch.arange(Y_t.shape[-2] - 1, -1, -1))
142 |     grid_x = grid_x.transpose(1, 0).to(device)
143 |     grid_z = grid_z.transpose(1, 0).to(device)
144 |     grid_x = grid_x.unsqueeze(0).expand(Y_t.size())
145 |     grid_z = grid_z.unsqueeze(0).expand(Y_t.size())
146 | 
147 |     X_t = (grid_x[:, ::scale, ::scale] - camera_matrix.xc) * \
148 |         Y_t[:, ::scale, ::scale] / camera_matrix.f
149 |     Z_t = (grid_z[:, ::scale, ::scale] - camera_matrix.zc) * \
150 |         Y_t[:, ::scale, ::scale] / camera_matrix.f
151 | 
152 |     XYZ = torch.stack(
153 |         (X_t, Y_t[:, ::scale, ::scale], Z_t), dim=len(Y_t.size()))
154 | 
155 |     return XYZ
156 | 
157 | 
158 | def transform_camera_view_t(
159 |         XYZ, sensor_height, camera_elevation_degree, device):
160 |     """
161 |     Transforms the point cloud into geocentric frame to account for
162 |     camera elevation and angle
163 |     Input:
164 |         XYZ                     : ...x3
165 |         sensor_height           : height of the sensor
166 |         camera_elevation_degree : camera elevation to rectify.
167 |     Output:
168 |         XYZ : ...x3
169 |     """
170 |     R = ru.get_r_matrix(
171 |         [1., 0., 0.], angle=np.deg2rad(camera_elevation_degree))
172 |     XYZ = torch.matmul(XYZ.reshape(-1, 3),
173 |                        torch.from_numpy(R).float().transpose(1, 0).to(device)
174 |                        ).reshape(XYZ.shape)
175 |     XYZ[..., 2] = XYZ[..., 2] + sensor_height
176 |     return XYZ
177 | 
178 | 
179 | def transform_pose_t(XYZ, current_pose, device):
180 |     """
181 |     Transforms the point cloud into geocentric frame to account for
182 |     camera position
183 |     Input:
184 |         XYZ                     : ...x3
185 |         current_pose            : camera position (x, y, theta (radians))
186 |     Output:
187 |         XYZ : ...x3
188 |     """
189 |     R = ru.get_r_matrix([0., 0., 1.], angle=current_pose[2] - np.pi / 2.)
190 |     XYZ = torch.matmul(XYZ.reshape(-1, 3),
191 |                        torch.from_numpy(R).float().transpose(1, 0).to(device)
192 |                        ).reshape(XYZ.shape)
193 |     XYZ[..., 0] += current_pose[0]
194 |     XYZ[..., 1] += current_pose[1]
195 |     return XYZ
196 | 
197 | 
198 | def splat_feat_nd(init_grid, feat, coords):
199 |     """
200 |     Args:
201 |         init_grid: B X nF X W X H X D X ..
202 |         feat: B X nF X nPt
203 |         coords: B X nDims X nPt in [-1, 1]
204 |     Returns:
205 |         grid: B X nF X W X H X D X ..
206 |     """
207 |     wts_dim = []
208 |     pos_dim = []
209 |     grid_dims = init_grid.shape[2:]
210 | 
211 |     B = init_grid.shape[0]
212 |     F = init_grid.shape[1]
213 | 
214 |     n_dims = len(grid_dims)
215 | 
216 |     grid_flat = init_grid.view(B, F, -1)
217 | 
218 |     for d in range(n_dims):
219 |         pos = coords[:, [d], :] * grid_dims[d] / 2 + grid_dims[d] / 2
220 |         pos_d = []
221 |         wts_d = []
222 | 
223 |         for ix in [0, 1]:
224 |             pos_ix = torch.floor(pos) + ix
225 |             safe_ix = (pos_ix > 0) & (pos_ix < grid_dims[d])
226 |             safe_ix = safe_ix.type(pos.dtype)
227 | 
228 |             wts_ix = 1 - torch.abs(pos - pos_ix)
229 | 
230 |             wts_ix = wts_ix * safe_ix
231 |             pos_ix = pos_ix * safe_ix
232 | 
233 |             pos_d.append(pos_ix)
234 |             wts_d.append(wts_ix)
235 | 
236 |         pos_dim.append(pos_d)
237 |         wts_dim.append(wts_d)
238 | 
239 |     l_ix = [[0, 1] for d in range(n_dims)]
240 | 
241 |     for ix_d in itertools.product(*l_ix):
242 |         wts = torch.ones_like(wts_dim[0][0])
243 |         index = torch.zeros_like(wts_dim[0][0])
244 |         for d in range(n_dims):
245 |             index = index * grid_dims[d] + pos_dim[d][ix_d[d]]
246 |             wts = wts * wts_dim[d][ix_d[d]]
247 | 
248 |         index = index.long()
249 |         grid_flat.scatter_add_(2, index.expand(-1, F, -1), feat * wts)
250 |         grid_flat = torch.round(grid_flat)
251 | 
252 |     return grid_flat.view(init_grid.shape)
253 | 


--------------------------------------------------------------------------------
/envs/utils/fmm_planner.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import skfmm
  4 | import skimage
  5 | from numpy import ma
  6 | 
  7 | 
  8 | def get_mask(sx, sy, scale, step_size):
  9 |     size = int(step_size // scale) * 2 + 1
 10 |     mask = np.zeros((size, size))
 11 |     for i in range(size):
 12 |         for j in range(size):
 13 |             if ((i + 0.5) - (size // 2 + sx)) ** 2 + \
 14 |                ((j + 0.5) - (size // 2 + sy)) ** 2 <= \
 15 |                     step_size ** 2 \
 16 |                and ((i + 0.5) - (size // 2 + sx)) ** 2 + \
 17 |                ((j + 0.5) - (size // 2 + sy)) ** 2 > \
 18 |                     (step_size - 1) ** 2:
 19 |                 mask[i, j] = 1
 20 | 
 21 |     mask[size // 2, size // 2] = 1
 22 |     return mask
 23 | 
 24 | 
 25 | def get_dist(sx, sy, scale, step_size):
 26 |     size = int(step_size // scale) * 2 + 1
 27 |     mask = np.zeros((size, size)) + 1e-10
 28 |     for i in range(size):
 29 |         for j in range(size):
 30 |             if ((i + 0.5) - (size // 2 + sx)) ** 2 + \
 31 |                ((j + 0.5) - (size // 2 + sy)) ** 2 <= \
 32 |                     step_size ** 2:
 33 |                 mask[i, j] = max(5,
 34 |                                  (((i + 0.5) - (size // 2 + sx)) ** 2 +
 35 |                                   ((j + 0.5) - (size // 2 + sy)) ** 2) ** 0.5)
 36 |     return mask
 37 | 
 38 | 
 39 | class FMMPlanner():
 40 |     def __init__(self, traversible, scale=1, step_size=5):
 41 |         self.scale = scale
 42 |         self.step_size = step_size
 43 |         if scale != 1.:
 44 |             self.traversible = cv2.resize(traversible,
 45 |                                           (traversible.shape[1] // scale,
 46 |                                            traversible.shape[0] // scale),
 47 |                                           interpolation=cv2.INTER_NEAREST)
 48 |             self.traversible = np.rint(self.traversible)
 49 |         else:
 50 |             self.traversible = traversible
 51 | 
 52 |         self.du = int(self.step_size / (self.scale * 1.))
 53 |         self.fmm_dist = None
 54 | 
 55 |     def set_goal(self, goal, auto_improve=False):
 56 |         traversible_ma = ma.masked_values(self.traversible * 1, 0)
 57 |         goal_x, goal_y = int(goal[0] / (self.scale * 1.)), \
 58 |             int(goal[1] / (self.scale * 1.))
 59 | 
 60 |         if self.traversible[goal_x, goal_y] == 0. and auto_improve:
 61 |             goal_x, goal_y = self._find_nearest_goal([goal_x, goal_y])
 62 | 
 63 |         traversible_ma[goal_x, goal_y] = 0
 64 |         dd = skfmm.distance(traversible_ma, dx=1)
 65 |         dd = ma.filled(dd, np.max(dd) + 1)
 66 |         self.fmm_dist = dd
 67 |         return
 68 | 
 69 |     def set_multi_goal(self, goal_map):
 70 |         traversible_ma = ma.masked_values(self.traversible * 1, 0)
 71 |         traversible_ma[goal_map == 1] = 0
 72 |         dd = skfmm.distance(traversible_ma, dx=1)
 73 |         dd = ma.filled(dd, np.max(dd) + 1)
 74 |         self.fmm_dist = dd
 75 |         return
 76 | 
 77 |     def get_short_term_goal(self, state):
 78 |         scale = self.scale * 1.
 79 |         state = [x / scale for x in state]
 80 |         dx, dy = state[0] - int(state[0]), state[1] - int(state[1])
 81 |         mask = get_mask(dx, dy, scale, self.step_size)
 82 |         dist_mask = get_dist(dx, dy, scale, self.step_size)
 83 | 
 84 |         state = [int(x) for x in state]
 85 | 
 86 |         dist = np.pad(self.fmm_dist, self.du,
 87 |                       'constant', constant_values=self.fmm_dist.shape[0] ** 2)
 88 |         subset = dist[state[0]:state[0] + 2 * self.du + 1,
 89 |                       state[1]:state[1] + 2 * self.du + 1]
 90 | 
 91 |         assert subset.shape[0] == 2 * self.du + 1 and \
 92 |             subset.shape[1] == 2 * self.du + 1, \
 93 |             "Planning error: unexpected subset shape {}".format(subset.shape)
 94 | 
 95 |         subset *= mask
 96 |         subset += (1 - mask) * self.fmm_dist.shape[0] ** 2
 97 | 
 98 |         if subset[self.du, self.du] < 0.25 * 100 / 5.:  # 25cm
 99 |             stop = True
100 |         else:
101 |             stop = False
102 | 
103 |         subset -= subset[self.du, self.du]
104 |         ratio1 = subset / dist_mask
105 |         subset[ratio1 < -1.5] = 1
106 | 
107 |         (stg_x, stg_y) = np.unravel_index(np.argmin(subset), subset.shape)
108 | 
109 |         if subset[stg_x, stg_y] > -0.0001:
110 |             replan = True
111 |         else:
112 |             replan = False
113 | 
114 |         return (stg_x + state[0] - self.du) * scale, \
115 |                (stg_y + state[1] - self.du) * scale, replan, stop
116 | 
117 |     def _find_nearest_goal(self, goal):
118 |         traversible = skimage.morphology.binary_dilation(
119 |             np.zeros(self.traversible.shape),
120 |             skimage.morphology.disk(2)) != True
121 |         traversible = traversible * 1.
122 |         planner = FMMPlanner(traversible)
123 |         planner.set_goal(goal)
124 | 
125 |         mask = self.traversible
126 | 
127 |         dist_map = planner.fmm_dist * mask
128 |         dist_map[dist_map == 0] = dist_map.max()
129 | 
130 |         goal = np.unravel_index(dist_map.argmin(), dist_map.shape)
131 | 
132 |         return goal
133 | 


--------------------------------------------------------------------------------
/envs/utils/map_builder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import envs.utils.depth_utils as du
 3 | 
 4 | 
 5 | class MapBuilder(object):
 6 |     def __init__(self, params):
 7 |         self.params = params
 8 |         frame_width = params['frame_width']
 9 |         frame_height = params['frame_height']
10 |         fov = params['fov']
11 |         self.camera_matrix = du.get_camera_matrix(
12 |             frame_width,
13 |             frame_height,
14 |             fov)
15 |         self.vision_range = params['vision_range']
16 | 
17 |         self.map_size_cm = params['map_size_cm']
18 |         self.resolution = params['resolution']
19 |         agent_min_z = params['agent_min_z']
20 |         agent_max_z = params['agent_max_z']
21 |         self.z_bins = [agent_min_z, agent_max_z]
22 |         self.du_scale = params['du_scale']
23 |         self.visualize = params['visualize']
24 |         self.obs_threshold = params['obs_threshold']
25 | 
26 |         self.map = np.zeros((self.map_size_cm // self.resolution,
27 |                              self.map_size_cm // self.resolution,
28 |                              len(self.z_bins) + 1), dtype=np.float32)
29 | 
30 |         self.agent_height = params['agent_height']
31 |         self.agent_view_angle = params['agent_view_angle']
32 |         return
33 | 
34 |     def update_map(self, depth, current_pose):
35 |         with np.errstate(invalid="ignore"):
36 |             depth[depth > self.vision_range * self.resolution] = np.NaN
37 |         point_cloud = du.get_point_cloud_from_z(depth, self.camera_matrix,
38 |                                                 scale=self.du_scale)
39 | 
40 |         agent_view = du.transform_camera_view(point_cloud,
41 |                                               self.agent_height,
42 |                                               self.agent_view_angle)
43 | 
44 |         shift_loc = [self.vision_range * self.resolution // 2, 0, np.pi / 2.0]
45 |         agent_view_centered = du.transform_pose(agent_view, shift_loc)
46 | 
47 |         agent_view_flat = du.bin_points(
48 |             agent_view_centered,
49 |             self.vision_range,
50 |             self.z_bins,
51 |             self.resolution)
52 | 
53 |         agent_view_cropped = agent_view_flat[:, :, 1]
54 |         agent_view_cropped = agent_view_cropped / self.obs_threshold
55 |         agent_view_cropped[agent_view_cropped >= 0.5] = 1.0
56 |         agent_view_cropped[agent_view_cropped < 0.5] = 0.0
57 | 
58 |         agent_view_explored = agent_view_flat.sum(2)
59 |         agent_view_explored[agent_view_explored > 0] = 1.0
60 | 
61 |         geocentric_pc = du.transform_pose(agent_view, current_pose)
62 | 
63 |         geocentric_flat = du.bin_points(
64 |             geocentric_pc,
65 |             self.map.shape[0],
66 |             self.z_bins,
67 |             self.resolution)
68 | 
69 |         self.map = self.map + geocentric_flat
70 | 
71 |         map_gt = self.map[:, :, 1] / self.obs_threshold
72 |         map_gt[map_gt >= 0.5] = 1.0
73 |         map_gt[map_gt < 0.5] = 0.0
74 | 
75 |         explored_gt = self.map.sum(2)
76 |         explored_gt[explored_gt > 1] = 1.0
77 | 
78 |         return agent_view_cropped, map_gt, agent_view_explored, explored_gt
79 | 
80 |     def get_st_pose(self, current_loc):
81 |         loc = [- (current_loc[0] / self.resolution
82 |                   - self.map_size_cm // (self.resolution * 2)) /
83 |                (self.map_size_cm // (self.resolution * 2)),
84 |                - (current_loc[1] / self.resolution
85 |                   - self.map_size_cm // (self.resolution * 2)) /
86 |                (self.map_size_cm // (self.resolution * 2)),
87 |                90 - np.rad2deg(current_loc[2])]
88 |         return loc
89 | 
90 |     def reset_map(self, map_size):
91 |         self.map_size_cm = map_size
92 | 
93 |         self.map = np.zeros((self.map_size_cm // self.resolution,
94 |                              self.map_size_cm // self.resolution,
95 |                              len(self.z_bins) + 1), dtype=np.float32)
96 | 
97 |     def get_map(self):
98 |         return self.map
99 | 


--------------------------------------------------------------------------------
/envs/utils/pose.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def get_l2_distance(x1, x2, y1, y2):
 5 |     """
 6 |     Computes the L2 distance between two points.
 7 |     """
 8 |     return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
 9 | 
10 | 
11 | def get_rel_pose_change(pos2, pos1):
12 |     x1, y1, o1 = pos1
13 |     x2, y2, o2 = pos2
14 | 
15 |     theta = np.arctan2(y2 - y1, x2 - x1) - o1
16 |     dist = get_l2_distance(x1, x2, y1, y2)
17 |     dx = dist * np.cos(theta)
18 |     dy = dist * np.sin(theta)
19 |     do = o2 - o1
20 | 
21 |     return dx, dy, do
22 | 
23 | 
24 | def get_new_pose(pose, rel_pose_change):
25 |     x, y, o = pose
26 |     dx, dy, do = rel_pose_change
27 | 
28 |     global_dx = dx * np.sin(np.deg2rad(o)) + dy * np.cos(np.deg2rad(o))
29 |     global_dy = dx * np.cos(np.deg2rad(o)) - dy * np.sin(np.deg2rad(o))
30 |     x += global_dy
31 |     y += global_dx
32 |     o += np.rad2deg(do)
33 |     if o > 180.:
34 |         o -= 360.
35 | 
36 |     return x, y, o
37 | 
38 | 
39 | def threshold_poses(coords, shape):
40 |     coords[0] = min(max(0, coords[0]), shape[0] - 1)
41 |     coords[1] = min(max(0, coords[1]), shape[1] - 1)
42 |     return coords
43 | 


--------------------------------------------------------------------------------
/envs/utils/rotation_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Utilities for generating and applying rotation matrices.
17 | """
18 | import numpy as np
19 | 
20 | ANGLE_EPS = 0.001
21 | 
22 | 
23 | def normalize(v):
24 |     return v / np.linalg.norm(v)
25 | 
26 | 
27 | def get_r_matrix(ax_, angle):
28 |     ax = normalize(ax_)
29 |     if np.abs(angle) > ANGLE_EPS:
30 |         S_hat = np.array(
31 |             [[0.0, -ax[2], ax[1]], [ax[2], 0.0, -ax[0]], [-ax[1], ax[0], 0.0]],
32 |             dtype=np.float32)
33 |         R = np.eye(3) + np.sin(angle) * S_hat + \
34 |             (1 - np.cos(angle)) * (np.linalg.matrix_power(S_hat, 2))
35 |     else:
36 |         R = np.eye(3)
37 |     return R
38 | 
39 | 
40 | def r_between(v_from_, v_to_):
41 |     v_from = normalize(v_from_)
42 |     v_to = normalize(v_to_)
43 |     ax = normalize(np.cross(v_from, v_to))
44 |     angle = np.arccos(np.dot(v_from, v_to))
45 |     return get_r_matrix(ax, angle)
46 | 
47 | 
48 | def rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to):
49 |     inputs = [up_from, lookat_from, up_to, lookat_to]
50 |     for i in range(4):
51 |         inputs[i] = normalize(np.array(inputs[i]).reshape((-1,)))
52 |     up_from, lookat_from, up_to, lookat_to = inputs
53 |     r1 = r_between(lookat_from, lookat_to)
54 | 
55 |     new_x = np.dot(r1, np.array([1, 0, 0]).reshape((-1, 1))).reshape((-1))
56 |     to_x = normalize(np.cross(lookat_to, up_to))
57 |     angle = np.arccos(np.dot(new_x, to_x))
58 |     if angle > ANGLE_EPS:
59 |         if angle < np.pi - ANGLE_EPS:
60 |             ax = normalize(np.cross(new_x, to_x))
61 |             flip = np.dot(lookat_to, ax)
62 |             if flip > 0:
63 |                 r2 = get_r_matrix(lookat_to, angle)
64 |             elif flip < 0:
65 |                 r2 = get_r_matrix(lookat_to, -1. * angle)
66 |         else:
67 |             # Angle of rotation is too close to 180 degrees, direction of
68 |             # rotation does not matter.
69 |             r2 = get_r_matrix(lookat_to, angle)
70 |     else:
71 |         r2 = np.eye(3)
72 |     return np.dot(r2, r1)
73 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from collections import deque, defaultdict
  2 | import os
  3 | import logging
  4 | import time
  5 | import json
  6 | import gym
  7 | import torch.nn as nn
  8 | import torch
  9 | import numpy as np
 10 | 
 11 | from model import RL_Policy, Semantic_Mapping
 12 | from utils.storage import GlobalRolloutStorage
 13 | from envs import make_vec_envs
 14 | from arguments import get_args
 15 | import algo
 16 | 
 17 | os.environ["OMP_NUM_THREADS"] = "1"
 18 | 
 19 | 
 20 | def main():
 21 |     args = get_args()
 22 | 
 23 |     np.random.seed(args.seed)
 24 |     torch.manual_seed(args.seed)
 25 | 
 26 |     if args.cuda:
 27 |         torch.cuda.manual_seed(args.seed)
 28 | 
 29 |     # Setup Logging
 30 |     log_dir = "{}/models/{}/".format(args.dump_location, args.exp_name)
 31 |     dump_dir = "{}/dump/{}/".format(args.dump_location, args.exp_name)
 32 | 
 33 |     if not os.path.exists(log_dir):
 34 |         os.makedirs(log_dir)
 35 |     if not os.path.exists(dump_dir):
 36 |         os.makedirs(dump_dir)
 37 | 
 38 |     logging.basicConfig(
 39 |         filename=log_dir + 'train.log',
 40 |         level=logging.INFO)
 41 |     print("Dumping at {}".format(log_dir))
 42 |     print(args)
 43 |     logging.info(args)
 44 | 
 45 |     # Logging and loss variables
 46 |     num_scenes = args.num_processes
 47 |     num_episodes = int(args.num_eval_episodes)
 48 |     device = args.device = torch.device("cuda:0" if args.cuda else "cpu")
 49 | 
 50 |     g_masks = torch.ones(num_scenes).float().to(device)
 51 | 
 52 |     best_g_reward = -np.inf
 53 | 
 54 |     if args.eval:
 55 |         episode_success = []
 56 |         episode_spl = []
 57 |         episode_dist = []
 58 |         for _ in range(args.num_processes):
 59 |             episode_success.append(deque(maxlen=num_episodes))
 60 |             episode_spl.append(deque(maxlen=num_episodes))
 61 |             episode_dist.append(deque(maxlen=num_episodes))
 62 | 
 63 |     else:
 64 |         episode_success = deque(maxlen=1000)
 65 |         episode_spl = deque(maxlen=1000)
 66 |         episode_dist = deque(maxlen=1000)
 67 | 
 68 |     finished = np.zeros((args.num_processes))
 69 |     wait_env = np.zeros((args.num_processes))
 70 | 
 71 |     g_episode_rewards = deque(maxlen=1000)
 72 | 
 73 |     g_value_losses = deque(maxlen=1000)
 74 |     g_action_losses = deque(maxlen=1000)
 75 |     g_dist_entropies = deque(maxlen=1000)
 76 | 
 77 |     per_step_g_rewards = deque(maxlen=1000)
 78 | 
 79 |     g_process_rewards = np.zeros((num_scenes))
 80 | 
 81 |     # Starting environments
 82 |     torch.set_num_threads(1)
 83 |     envs = make_vec_envs(args)
 84 |     obs, infos = envs.reset()
 85 | 
 86 |     torch.set_grad_enabled(False)
 87 | 
 88 |     # Initialize map variables:
 89 |     # Full map consists of multiple channels containing the following:
 90 |     # 1. Obstacle Map
 91 |     # 2. Exploread Area
 92 |     # 3. Current Agent Location
 93 |     # 4. Past Agent Locations
 94 |     # 5,6,7,.. : Semantic Categories
 95 |     nc = args.num_sem_categories + 4  # num channels
 96 | 
 97 |     # Calculating full and local map sizes
 98 |     map_size = args.map_size_cm // args.map_resolution
 99 |     full_w, full_h = map_size, map_size
100 |     local_w = int(full_w / args.global_downscaling)
101 |     local_h = int(full_h / args.global_downscaling)
102 | 
103 |     # Initializing full and local map
104 |     full_map = torch.zeros(num_scenes, nc, full_w, full_h).float().to(device)
105 |     local_map = torch.zeros(num_scenes, nc, local_w,
106 |                             local_h).float().to(device)
107 | 
108 |     # Initial full and local pose
109 |     full_pose = torch.zeros(num_scenes, 3).float().to(device)
110 |     local_pose = torch.zeros(num_scenes, 3).float().to(device)
111 | 
112 |     # Origin of local map
113 |     origins = np.zeros((num_scenes, 3))
114 | 
115 |     # Local Map Boundaries
116 |     lmb = np.zeros((num_scenes, 4)).astype(int)
117 | 
118 |     # Planner pose inputs has 7 dimensions
119 |     # 1-3 store continuous global agent location
120 |     # 4-7 store local map boundaries
121 |     planner_pose_inputs = np.zeros((num_scenes, 7))
122 | 
123 |     def get_local_map_boundaries(agent_loc, local_sizes, full_sizes):
124 |         loc_r, loc_c = agent_loc
125 |         local_w, local_h = local_sizes
126 |         full_w, full_h = full_sizes
127 | 
128 |         if args.global_downscaling > 1:
129 |             gx1, gy1 = loc_r - local_w // 2, loc_c - local_h // 2
130 |             gx2, gy2 = gx1 + local_w, gy1 + local_h
131 |             if gx1 < 0:
132 |                 gx1, gx2 = 0, local_w
133 |             if gx2 > full_w:
134 |                 gx1, gx2 = full_w - local_w, full_w
135 | 
136 |             if gy1 < 0:
137 |                 gy1, gy2 = 0, local_h
138 |             if gy2 > full_h:
139 |                 gy1, gy2 = full_h - local_h, full_h
140 |         else:
141 |             gx1, gx2, gy1, gy2 = 0, full_w, 0, full_h
142 | 
143 |         return [gx1, gx2, gy1, gy2]
144 | 
145 |     def init_map_and_pose():
146 |         full_map.fill_(0.)
147 |         full_pose.fill_(0.)
148 |         full_pose[:, :2] = args.map_size_cm / 100.0 / 2.0
149 | 
150 |         locs = full_pose.cpu().numpy()
151 |         planner_pose_inputs[:, :3] = locs
152 |         for e in range(num_scenes):
153 |             r, c = locs[e, 1], locs[e, 0]
154 |             loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
155 |                             int(c * 100.0 / args.map_resolution)]
156 | 
157 |             full_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0
158 | 
159 |             lmb[e] = get_local_map_boundaries((loc_r, loc_c),
160 |                                               (local_w, local_h),
161 |                                               (full_w, full_h))
162 | 
163 |             planner_pose_inputs[e, 3:] = lmb[e]
164 |             origins[e] = [lmb[e][2] * args.map_resolution / 100.0,
165 |                           lmb[e][0] * args.map_resolution / 100.0, 0.]
166 | 
167 |         for e in range(num_scenes):
168 |             local_map[e] = full_map[e, :,
169 |                                     lmb[e, 0]:lmb[e, 1],
170 |                                     lmb[e, 2]:lmb[e, 3]]
171 |             local_pose[e] = full_pose[e] - \
172 |                 torch.from_numpy(origins[e]).to(device).float()
173 | 
174 |     def init_map_and_pose_for_env(e):
175 |         full_map[e].fill_(0.)
176 |         full_pose[e].fill_(0.)
177 |         full_pose[e, :2] = args.map_size_cm / 100.0 / 2.0
178 | 
179 |         locs = full_pose[e].cpu().numpy()
180 |         planner_pose_inputs[e, :3] = locs
181 |         r, c = locs[1], locs[0]
182 |         loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
183 |                         int(c * 100.0 / args.map_resolution)]
184 | 
185 |         full_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0
186 | 
187 |         lmb[e] = get_local_map_boundaries((loc_r, loc_c),
188 |                                           (local_w, local_h),
189 |                                           (full_w, full_h))
190 | 
191 |         planner_pose_inputs[e, 3:] = lmb[e]
192 |         origins[e] = [lmb[e][2] * args.map_resolution / 100.0,
193 |                       lmb[e][0] * args.map_resolution / 100.0, 0.]
194 | 
195 |         local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]]
196 |         local_pose[e] = full_pose[e] - \
197 |             torch.from_numpy(origins[e]).to(device).float()
198 | 
199 |     def update_intrinsic_rew(e):
200 |         prev_explored_area = full_map[e, 1].sum(1).sum(0)
201 |         full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \
202 |             local_map[e]
203 |         curr_explored_area = full_map[e, 1].sum(1).sum(0)
204 |         intrinsic_rews[e] = curr_explored_area - prev_explored_area
205 |         intrinsic_rews[e] *= (args.map_resolution / 100.)**2  # to m^2
206 | 
207 |     init_map_and_pose()
208 | 
209 |     # Global policy observation space
210 |     ngc = 8 + args.num_sem_categories
211 |     es = 2
212 |     g_observation_space = gym.spaces.Box(0, 1,
213 |                                          (ngc,
214 |                                           local_w,
215 |                                           local_h), dtype='uint8')
216 | 
217 |     # Global policy action space
218 |     g_action_space = gym.spaces.Box(low=0.0, high=0.99,
219 |                                     shape=(2,), dtype=np.float32)
220 | 
221 |     # Global policy recurrent layer size
222 |     g_hidden_size = args.global_hidden_size
223 | 
224 |     # Semantic Mapping
225 |     sem_map_module = Semantic_Mapping(args).to(device)
226 |     sem_map_module.eval()
227 | 
228 |     # Global policy
229 |     g_policy = RL_Policy(g_observation_space.shape, g_action_space,
230 |                          model_type=1,
231 |                          base_kwargs={'recurrent': args.use_recurrent_global,
232 |                                       'hidden_size': g_hidden_size,
233 |                                       'num_sem_categories': ngc - 8
234 |                                       }).to(device)
235 |     g_agent = algo.PPO(g_policy, args.clip_param, args.ppo_epoch,
236 |                        args.num_mini_batch, args.value_loss_coef,
237 |                        args.entropy_coef, lr=args.lr, eps=args.eps,
238 |                        max_grad_norm=args.max_grad_norm)
239 | 
240 |     global_input = torch.zeros(num_scenes, ngc, local_w, local_h)
241 |     global_orientation = torch.zeros(num_scenes, 1).long()
242 |     intrinsic_rews = torch.zeros(num_scenes).to(device)
243 |     extras = torch.zeros(num_scenes, 2)
244 | 
245 |     # Storage
246 |     g_rollouts = GlobalRolloutStorage(args.num_global_steps,
247 |                                       num_scenes, g_observation_space.shape,
248 |                                       g_action_space, g_policy.rec_state_size,
249 |                                       es).to(device)
250 | 
251 |     if args.load != "0":
252 |         print("Loading model {}".format(args.load))
253 |         state_dict = torch.load(args.load,
254 |                                 map_location=lambda storage, loc: storage)
255 |         g_policy.load_state_dict(state_dict)
256 | 
257 |     if args.eval:
258 |         g_policy.eval()
259 | 
260 |     # Predict semantic map from frame 1
261 |     poses = torch.from_numpy(np.asarray(
262 |         [infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes)])
263 |     ).float().to(device)
264 | 
265 |     _, local_map, _, local_pose = \
266 |         sem_map_module(obs, poses, local_map, local_pose)
267 | 
268 |     # Compute Global policy input
269 |     locs = local_pose.cpu().numpy()
270 |     global_input = torch.zeros(num_scenes, ngc, local_w, local_h)
271 |     global_orientation = torch.zeros(num_scenes, 1).long()
272 | 
273 |     for e in range(num_scenes):
274 |         r, c = locs[e, 1], locs[e, 0]
275 |         loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
276 |                         int(c * 100.0 / args.map_resolution)]
277 | 
278 |         local_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.
279 |         global_orientation[e] = int((locs[e, 2] + 180.0) / 5.)
280 | 
281 |     global_input[:, 0:4, :, :] = local_map[:, 0:4, :, :].detach()
282 |     global_input[:, 4:8, :, :] = nn.MaxPool2d(args.global_downscaling)(
283 |         full_map[:, 0:4, :, :])
284 |     global_input[:, 8:, :, :] = local_map[:, 4:, :, :].detach()
285 |     goal_cat_id = torch.from_numpy(np.asarray(
286 |         [infos[env_idx]['goal_cat_id'] for env_idx
287 |          in range(num_scenes)]))
288 | 
289 |     extras = torch.zeros(num_scenes, 2)
290 |     extras[:, 0] = global_orientation[:, 0]
291 |     extras[:, 1] = goal_cat_id
292 | 
293 |     g_rollouts.obs[0].copy_(global_input)
294 |     g_rollouts.extras[0].copy_(extras)
295 | 
296 |     # Run Global Policy (global_goals = Long-Term Goal)
297 |     g_value, g_action, g_action_log_prob, g_rec_states = \
298 |         g_policy.act(
299 |             g_rollouts.obs[0],
300 |             g_rollouts.rec_states[0],
301 |             g_rollouts.masks[0],
302 |             extras=g_rollouts.extras[0],
303 |             deterministic=False
304 |         )
305 | 
306 |     cpu_actions = nn.Sigmoid()(g_action).cpu().numpy()
307 |     global_goals = [[int(action[0] * local_w), int(action[1] * local_h)]
308 |                     for action in cpu_actions]
309 |     global_goals = [[min(x, int(local_w - 1)), min(y, int(local_h - 1))]
310 |                     for x, y in global_goals]
311 | 
312 |     goal_maps = [np.zeros((local_w, local_h)) for _ in range(num_scenes)]
313 | 
314 |     for e in range(num_scenes):
315 |         goal_maps[e][global_goals[e][0], global_goals[e][1]] = 1
316 | 
317 |     planner_inputs = [{} for e in range(num_scenes)]
318 |     for e, p_input in enumerate(planner_inputs):
319 |         p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy()
320 |         p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy()
321 |         p_input['pose_pred'] = planner_pose_inputs[e]
322 |         p_input['goal'] = goal_maps[e]  # global_goals[e]
323 |         p_input['new_goal'] = 1
324 |         p_input['found_goal'] = 0
325 |         p_input['wait'] = wait_env[e] or finished[e]
326 |         if args.visualize or args.print_images:
327 |             local_map[e, -1, :, :] = 1e-5
328 |             p_input['sem_map_pred'] = local_map[e, 4:, :, :
329 |                                                 ].argmax(0).cpu().numpy()
330 | 
331 |     obs, _, done, infos = envs.plan_act_and_preprocess(planner_inputs)
332 | 
333 |     start = time.time()
334 |     g_reward = 0
335 | 
336 |     torch.set_grad_enabled(False)
337 |     spl_per_category = defaultdict(list)
338 |     success_per_category = defaultdict(list)
339 | 
340 |     for step in range(args.num_training_frames // args.num_processes + 1):
341 |         if finished.sum() == args.num_processes:
342 |             break
343 | 
344 |         g_step = (step // args.num_local_steps) % args.num_global_steps
345 |         l_step = step % args.num_local_steps
346 | 
347 |         # ------------------------------------------------------------------
348 |         # Reinitialize variables when episode ends
349 |         l_masks = torch.FloatTensor([0 if x else 1
350 |                                      for x in done]).to(device)
351 |         g_masks *= l_masks
352 | 
353 |         for e, x in enumerate(done):
354 |             if x:
355 |                 spl = infos[e]['spl']
356 |                 success = infos[e]['success']
357 |                 dist = infos[e]['distance_to_goal']
358 |                 spl_per_category[infos[e]['goal_name']].append(spl)
359 |                 success_per_category[infos[e]['goal_name']].append(success)
360 |                 if args.eval:
361 |                     episode_success[e].append(success)
362 |                     episode_spl[e].append(spl)
363 |                     episode_dist[e].append(dist)
364 |                     if len(episode_success[e]) == num_episodes:
365 |                         finished[e] = 1
366 |                 else:
367 |                     episode_success.append(success)
368 |                     episode_spl.append(spl)
369 |                     episode_dist.append(dist)
370 |                 wait_env[e] = 1.
371 |                 update_intrinsic_rew(e)
372 |                 init_map_and_pose_for_env(e)
373 |         # ------------------------------------------------------------------
374 | 
375 |         # ------------------------------------------------------------------
376 |         # Semantic Mapping Module
377 |         poses = torch.from_numpy(np.asarray(
378 |             [infos[env_idx]['sensor_pose'] for env_idx
379 |              in range(num_scenes)])
380 |         ).float().to(device)
381 | 
382 |         _, local_map, _, local_pose = \
383 |             sem_map_module(obs, poses, local_map, local_pose)
384 | 
385 |         locs = local_pose.cpu().numpy()
386 |         planner_pose_inputs[:, :3] = locs + origins
387 |         local_map[:, 2, :, :].fill_(0.)  # Resetting current location channel
388 |         for e in range(num_scenes):
389 |             r, c = locs[e, 1], locs[e, 0]
390 |             loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
391 |                             int(c * 100.0 / args.map_resolution)]
392 |             local_map[e, 2:4, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1.
393 | 
394 |         # ------------------------------------------------------------------
395 | 
396 |         # ------------------------------------------------------------------
397 |         # Global Policy
398 |         if l_step == args.num_local_steps - 1:
399 |             # For every global step, update the full and local maps
400 |             for e in range(num_scenes):
401 |                 if wait_env[e] == 1:  # New episode
402 |                     wait_env[e] = 0.
403 |                 else:
404 |                     update_intrinsic_rew(e)
405 | 
406 |                 full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \
407 |                     local_map[e]
408 |                 full_pose[e] = local_pose[e] + \
409 |                     torch.from_numpy(origins[e]).to(device).float()
410 | 
411 |                 locs = full_pose[e].cpu().numpy()
412 |                 r, c = locs[1], locs[0]
413 |                 loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
414 |                                 int(c * 100.0 / args.map_resolution)]
415 | 
416 |                 lmb[e] = get_local_map_boundaries((loc_r, loc_c),
417 |                                                   (local_w, local_h),
418 |                                                   (full_w, full_h))
419 | 
420 |                 planner_pose_inputs[e, 3:] = lmb[e]
421 |                 origins[e] = [lmb[e][2] * args.map_resolution / 100.0,
422 |                               lmb[e][0] * args.map_resolution / 100.0, 0.]
423 | 
424 |                 local_map[e] = full_map[e, :,
425 |                                         lmb[e, 0]:lmb[e, 1],
426 |                                         lmb[e, 2]:lmb[e, 3]]
427 |                 local_pose[e] = full_pose[e] - \
428 |                     torch.from_numpy(origins[e]).to(device).float()
429 | 
430 |             locs = local_pose.cpu().numpy()
431 |             for e in range(num_scenes):
432 |                 global_orientation[e] = int((locs[e, 2] + 180.0) / 5.)
433 |             global_input[:, 0:4, :, :] = local_map[:, 0:4, :, :]
434 |             global_input[:, 4:8, :, :] = \
435 |                 nn.MaxPool2d(args.global_downscaling)(
436 |                     full_map[:, 0:4, :, :])
437 |             global_input[:, 8:, :, :] = local_map[:, 4:, :, :].detach()
438 |             goal_cat_id = torch.from_numpy(np.asarray(
439 |                 [infos[env_idx]['goal_cat_id'] for env_idx
440 |                  in range(num_scenes)]))
441 |             extras[:, 0] = global_orientation[:, 0]
442 |             extras[:, 1] = goal_cat_id
443 | 
444 |             # Get exploration reward and metrics
445 |             g_reward = torch.from_numpy(np.asarray(
446 |                 [infos[env_idx]['g_reward'] for env_idx in range(num_scenes)])
447 |             ).float().to(device)
448 |             g_reward += args.intrinsic_rew_coeff * intrinsic_rews.detach()
449 | 
450 |             g_process_rewards += g_reward.cpu().numpy()
451 |             g_total_rewards = g_process_rewards * \
452 |                 (1 - g_masks.cpu().numpy())
453 |             g_process_rewards *= g_masks.cpu().numpy()
454 |             per_step_g_rewards.append(np.mean(g_reward.cpu().numpy()))
455 | 
456 |             if np.sum(g_total_rewards) != 0:
457 |                 for total_rew in g_total_rewards:
458 |                     if total_rew != 0:
459 |                         g_episode_rewards.append(total_rew)
460 | 
461 |             # Add samples to global policy storage
462 |             if step == 0:
463 |                 g_rollouts.obs[0].copy_(global_input)
464 |                 g_rollouts.extras[0].copy_(extras)
465 |             else:
466 |                 g_rollouts.insert(
467 |                     global_input, g_rec_states,
468 |                     g_action, g_action_log_prob, g_value,
469 |                     g_reward, g_masks, extras
470 |                 )
471 | 
472 |             # Sample long-term goal from global policy
473 |             g_value, g_action, g_action_log_prob, g_rec_states = \
474 |                 g_policy.act(
475 |                     g_rollouts.obs[g_step + 1],
476 |                     g_rollouts.rec_states[g_step + 1],
477 |                     g_rollouts.masks[g_step + 1],
478 |                     extras=g_rollouts.extras[g_step + 1],
479 |                     deterministic=False
480 |                 )
481 |             cpu_actions = nn.Sigmoid()(g_action).cpu().numpy()
482 |             global_goals = [[int(action[0] * local_w),
483 |                              int(action[1] * local_h)]
484 |                             for action in cpu_actions]
485 |             global_goals = [[min(x, int(local_w - 1)),
486 |                              min(y, int(local_h - 1))]
487 |                             for x, y in global_goals]
488 | 
489 |             g_reward = 0
490 |             g_masks = torch.ones(num_scenes).float().to(device)
491 | 
492 |         # ------------------------------------------------------------------
493 | 
494 |         # ------------------------------------------------------------------
495 |         # Update long-term goal if target object is found
496 |         found_goal = [0 for _ in range(num_scenes)]
497 |         goal_maps = [np.zeros((local_w, local_h)) for _ in range(num_scenes)]
498 | 
499 |         for e in range(num_scenes):
500 |             goal_maps[e][global_goals[e][0], global_goals[e][1]] = 1
501 | 
502 |         for e in range(num_scenes):
503 |             cn = infos[e]['goal_cat_id'] + 4
504 |             if local_map[e, cn, :, :].sum() != 0.:
505 |                 cat_semantic_map = local_map[e, cn, :, :].cpu().numpy()
506 |                 cat_semantic_scores = cat_semantic_map
507 |                 cat_semantic_scores[cat_semantic_scores > 0] = 1.
508 |                 goal_maps[e] = cat_semantic_scores
509 |                 found_goal[e] = 1
510 |         # ------------------------------------------------------------------
511 | 
512 |         # ------------------------------------------------------------------
513 |         # Take action and get next observation
514 |         planner_inputs = [{} for e in range(num_scenes)]
515 |         for e, p_input in enumerate(planner_inputs):
516 |             p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy()
517 |             p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy()
518 |             p_input['pose_pred'] = planner_pose_inputs[e]
519 |             p_input['goal'] = goal_maps[e]  # global_goals[e]
520 |             p_input['new_goal'] = l_step == args.num_local_steps - 1
521 |             p_input['found_goal'] = found_goal[e]
522 |             p_input['wait'] = wait_env[e] or finished[e]
523 |             if args.visualize or args.print_images:
524 |                 local_map[e, -1, :, :] = 1e-5
525 |                 p_input['sem_map_pred'] = local_map[e, 4:, :,
526 |                                                     :].argmax(0).cpu().numpy()
527 | 
528 |         obs, _, done, infos = envs.plan_act_and_preprocess(planner_inputs)
529 |         # ------------------------------------------------------------------
530 | 
531 |         # ------------------------------------------------------------------
532 |         # Training
533 |         torch.set_grad_enabled(True)
534 |         if g_step % args.num_global_steps == args.num_global_steps - 1 \
535 |                 and l_step == args.num_local_steps - 1:
536 |             if not args.eval:
537 |                 g_next_value = g_policy.get_value(
538 |                     g_rollouts.obs[-1],
539 |                     g_rollouts.rec_states[-1],
540 |                     g_rollouts.masks[-1],
541 |                     extras=g_rollouts.extras[-1]
542 |                 ).detach()
543 | 
544 |                 g_rollouts.compute_returns(g_next_value, args.use_gae,
545 |                                            args.gamma, args.tau)
546 |                 g_value_loss, g_action_loss, g_dist_entropy = \
547 |                     g_agent.update(g_rollouts)
548 |                 g_value_losses.append(g_value_loss)
549 |                 g_action_losses.append(g_action_loss)
550 |                 g_dist_entropies.append(g_dist_entropy)
551 |             g_rollouts.after_update()
552 | 
553 |         torch.set_grad_enabled(False)
554 |         # ------------------------------------------------------------------
555 | 
556 |         # ------------------------------------------------------------------
557 |         # Logging
558 |         if step % args.log_interval == 0:
559 |             end = time.time()
560 |             time_elapsed = time.gmtime(end - start)
561 |             log = " ".join([
562 |                 "Time: {0:0=2d}d".format(time_elapsed.tm_mday - 1),
563 |                 "{},".format(time.strftime("%Hh %Mm %Ss", time_elapsed)),
564 |                 "num timesteps {},".format(step * num_scenes),
565 |                 "FPS {},".format(int(step * num_scenes / (end - start)))
566 |             ])
567 | 
568 |             log += "\n\tRewards:"
569 | 
570 |             if len(g_episode_rewards) > 0:
571 |                 log += " ".join([
572 |                     " Global step mean/med rew:",
573 |                     "{:.4f}/{:.4f},".format(
574 |                         np.mean(per_step_g_rewards),
575 |                         np.median(per_step_g_rewards)),
576 |                     " Global eps mean/med/min/max eps rew:",
577 |                     "{:.3f}/{:.3f}/{:.3f}/{:.3f},".format(
578 |                         np.mean(g_episode_rewards),
579 |                         np.median(g_episode_rewards),
580 |                         np.min(g_episode_rewards),
581 |                         np.max(g_episode_rewards))
582 |                 ])
583 | 
584 |             if args.eval:
585 |                 total_success = []
586 |                 total_spl = []
587 |                 total_dist = []
588 |                 for e in range(args.num_processes):
589 |                     for acc in episode_success[e]:
590 |                         total_success.append(acc)
591 |                     for dist in episode_dist[e]:
592 |                         total_dist.append(dist)
593 |                     for spl in episode_spl[e]:
594 |                         total_spl.append(spl)
595 | 
596 |                 if len(total_spl) > 0:
597 |                     log += " ObjectNav succ/spl/dtg:"
598 |                     log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format(
599 |                         np.mean(total_success),
600 |                         np.mean(total_spl),
601 |                         np.mean(total_dist),
602 |                         len(total_spl))
603 |             else:
604 |                 if len(episode_success) > 100:
605 |                     log += " ObjectNav succ/spl/dtg:"
606 |                     log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format(
607 |                         np.mean(episode_success),
608 |                         np.mean(episode_spl),
609 |                         np.mean(episode_dist),
610 |                         len(episode_spl))
611 | 
612 |             log += "\n\tLosses:"
613 |             if len(g_value_losses) > 0 and not args.eval:
614 |                 log += " ".join([
615 |                     " Policy Loss value/action/dist:",
616 |                     "{:.3f}/{:.3f}/{:.3f},".format(
617 |                         np.mean(g_value_losses),
618 |                         np.mean(g_action_losses),
619 |                         np.mean(g_dist_entropies))
620 |                 ])
621 | 
622 |             print(log)
623 |             logging.info(log)
624 |         # ------------------------------------------------------------------
625 | 
626 |         # ------------------------------------------------------------------
627 |         # Save best models
628 |         if (step * num_scenes) % args.save_interval < \
629 |                 num_scenes:
630 |             if len(g_episode_rewards) >= 1000 and \
631 |                     (np.mean(g_episode_rewards) >= best_g_reward) \
632 |                     and not args.eval:
633 |                 torch.save(g_policy.state_dict(),
634 |                            os.path.join(log_dir, "model_best.pth"))
635 |                 best_g_reward = np.mean(g_episode_rewards)
636 | 
637 |         # Save periodic models
638 |         if (step * num_scenes) % args.save_periodic < \
639 |                 num_scenes:
640 |             total_steps = step * num_scenes
641 |             if not args.eval:
642 |                 torch.save(g_policy.state_dict(),
643 |                            os.path.join(dump_dir,
644 |                                         "periodic_{}.pth".format(total_steps)))
645 |         # ------------------------------------------------------------------
646 | 
647 |     # Print and save model performance numbers during evaluation
648 |     if args.eval:
649 |         print("Dumping eval details...")
650 |         
651 |         total_success = []
652 |         total_spl = []
653 |         total_dist = []
654 |         for e in range(args.num_processes):
655 |             for acc in episode_success[e]:
656 |                 total_success.append(acc)
657 |             for dist in episode_dist[e]:
658 |                 total_dist.append(dist)
659 |             for spl in episode_spl[e]:
660 |                 total_spl.append(spl)
661 | 
662 |         if len(total_spl) > 0:
663 |             log = "Final ObjectNav succ/spl/dtg:"
664 |             log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format(
665 |                 np.mean(total_success),
666 |                 np.mean(total_spl),
667 |                 np.mean(total_dist),
668 |                 len(total_spl))
669 | 
670 |         print(log)
671 |         logging.info(log)
672 |             
673 |         # Save the spl per category
674 |         log = "Success | SPL per category\n"
675 |         for key in success_per_category:
676 |             log += "{}: {} | {}\n".format(key,
677 |                                           sum(success_per_category[key]) /
678 |                                           len(success_per_category[key]),
679 |                                           sum(spl_per_category[key]) /
680 |                                           len(spl_per_category[key]))
681 | 
682 |         print(log)
683 |         logging.info(log)
684 | 
685 |         with open('{}/{}_spl_per_cat_pred_thr.json'.format(
686 |                 dump_dir, args.split), 'w') as f:
687 |             json.dump(spl_per_category, f)
688 | 
689 |         with open('{}/{}_success_per_cat_pred_thr.json'.format(
690 |                 dump_dir, args.split), 'w') as f:
691 |             json.dump(success_per_category, f)
692 | 
693 | 
694 | if __name__ == "__main__":
695 |     main()
696 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn import functional as F
  4 | import numpy as np
  5 | 
  6 | from utils.distributions import Categorical, DiagGaussian
  7 | from utils.model import get_grid, ChannelPool, Flatten, NNBase
  8 | import envs.utils.depth_utils as du
  9 | 
 10 | 
 11 | class Goal_Oriented_Semantic_Policy(NNBase):
 12 | 
 13 |     def __init__(self, input_shape, recurrent=False, hidden_size=512,
 14 |                  num_sem_categories=16):
 15 |         super(Goal_Oriented_Semantic_Policy, self).__init__(
 16 |             recurrent, hidden_size, hidden_size)
 17 | 
 18 |         out_size = int(input_shape[1] / 16.) * int(input_shape[2] / 16.)
 19 | 
 20 |         self.main = nn.Sequential(
 21 |             nn.MaxPool2d(2),
 22 |             nn.Conv2d(num_sem_categories + 8, 32, 3, stride=1, padding=1),
 23 |             nn.ReLU(),
 24 |             nn.MaxPool2d(2),
 25 |             nn.Conv2d(32, 64, 3, stride=1, padding=1),
 26 |             nn.ReLU(),
 27 |             nn.MaxPool2d(2),
 28 |             nn.Conv2d(64, 128, 3, stride=1, padding=1),
 29 |             nn.ReLU(),
 30 |             nn.MaxPool2d(2),
 31 |             nn.Conv2d(128, 64, 3, stride=1, padding=1),
 32 |             nn.ReLU(),
 33 |             nn.Conv2d(64, 32, 3, stride=1, padding=1),
 34 |             nn.ReLU(),
 35 |             Flatten()
 36 |         )
 37 | 
 38 |         self.linear1 = nn.Linear(out_size * 32 + 8 * 2, hidden_size)
 39 |         self.linear2 = nn.Linear(hidden_size, 256)
 40 |         self.critic_linear = nn.Linear(256, 1)
 41 |         self.orientation_emb = nn.Embedding(72, 8)
 42 |         self.goal_emb = nn.Embedding(num_sem_categories, 8)
 43 |         self.train()
 44 | 
 45 |     def forward(self, inputs, rnn_hxs, masks, extras):
 46 |         x = self.main(inputs)
 47 |         orientation_emb = self.orientation_emb(extras[:, 0])
 48 |         goal_emb = self.goal_emb(extras[:, 1])
 49 | 
 50 |         x = torch.cat((x, orientation_emb, goal_emb), 1)
 51 | 
 52 |         x = nn.ReLU()(self.linear1(x))
 53 |         if self.is_recurrent:
 54 |             x, rnn_hxs = self._forward_gru(x, rnn_hxs, masks)
 55 | 
 56 |         x = nn.ReLU()(self.linear2(x))
 57 | 
 58 |         return self.critic_linear(x).squeeze(-1), x, rnn_hxs
 59 | 
 60 | 
 61 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L15
 62 | class RL_Policy(nn.Module):
 63 | 
 64 |     def __init__(self, obs_shape, action_space, model_type=0,
 65 |                  base_kwargs=None):
 66 | 
 67 |         super(RL_Policy, self).__init__()
 68 |         if base_kwargs is None:
 69 |             base_kwargs = {}
 70 | 
 71 |         if model_type == 1:
 72 |             self.network = Goal_Oriented_Semantic_Policy(
 73 |                 obs_shape, **base_kwargs)
 74 |         else:
 75 |             raise NotImplementedError
 76 | 
 77 |         if action_space.__class__.__name__ == "Discrete":
 78 |             num_outputs = action_space.n
 79 |             self.dist = Categorical(self.network.output_size, num_outputs)
 80 |         elif action_space.__class__.__name__ == "Box":
 81 |             num_outputs = action_space.shape[0]
 82 |             self.dist = DiagGaussian(self.network.output_size, num_outputs)
 83 |         else:
 84 |             raise NotImplementedError
 85 | 
 86 |         self.model_type = model_type
 87 | 
 88 |     @property
 89 |     def is_recurrent(self):
 90 |         return self.network.is_recurrent
 91 | 
 92 |     @property
 93 |     def rec_state_size(self):
 94 |         """Size of rnn_hx."""
 95 |         return self.network.rec_state_size
 96 | 
 97 |     def forward(self, inputs, rnn_hxs, masks, extras):
 98 |         if extras is None:
 99 |             return self.network(inputs, rnn_hxs, masks)
100 |         else:
101 |             return self.network(inputs, rnn_hxs, masks, extras)
102 | 
103 |     def act(self, inputs, rnn_hxs, masks, extras=None, deterministic=False):
104 | 
105 |         value, actor_features, rnn_hxs = self(inputs, rnn_hxs, masks, extras)
106 |         dist = self.dist(actor_features)
107 | 
108 |         if deterministic:
109 |             action = dist.mode()
110 |         else:
111 |             action = dist.sample()
112 | 
113 |         action_log_probs = dist.log_probs(action)
114 | 
115 |         return value, action, action_log_probs, rnn_hxs
116 | 
117 |     def get_value(self, inputs, rnn_hxs, masks, extras=None):
118 |         value, _, _ = self(inputs, rnn_hxs, masks, extras)
119 |         return value
120 | 
121 |     def evaluate_actions(self, inputs, rnn_hxs, masks, action, extras=None):
122 | 
123 |         value, actor_features, rnn_hxs = self(inputs, rnn_hxs, masks, extras)
124 |         dist = self.dist(actor_features)
125 | 
126 |         action_log_probs = dist.log_probs(action)
127 |         dist_entropy = dist.entropy().mean()
128 | 
129 |         return value, action_log_probs, dist_entropy, rnn_hxs
130 | 
131 | 
132 | class Semantic_Mapping(nn.Module):
133 | 
134 |     """
135 |     Semantic_Mapping
136 |     """
137 | 
138 |     def __init__(self, args):
139 |         super(Semantic_Mapping, self).__init__()
140 | 
141 |         self.device = args.device
142 |         self.screen_h = args.frame_height
143 |         self.screen_w = args.frame_width
144 |         self.resolution = args.map_resolution
145 |         self.z_resolution = args.map_resolution
146 |         self.map_size_cm = args.map_size_cm // args.global_downscaling
147 |         self.n_channels = 3
148 |         self.vision_range = args.vision_range
149 |         self.dropout = 0.5
150 |         self.fov = args.hfov
151 |         self.du_scale = args.du_scale
152 |         self.cat_pred_threshold = args.cat_pred_threshold
153 |         self.exp_pred_threshold = args.exp_pred_threshold
154 |         self.map_pred_threshold = args.map_pred_threshold
155 |         self.num_sem_categories = args.num_sem_categories
156 | 
157 |         self.max_height = int(360 / self.z_resolution)
158 |         self.min_height = int(-40 / self.z_resolution)
159 |         self.agent_height = args.camera_height * 100.
160 |         self.shift_loc = [self.vision_range *
161 |                           self.resolution // 2, 0, np.pi / 2.0]
162 |         self.camera_matrix = du.get_camera_matrix(
163 |             self.screen_w, self.screen_h, self.fov)
164 | 
165 |         self.pool = ChannelPool(1)
166 | 
167 |         vr = self.vision_range
168 | 
169 |         self.init_grid = torch.zeros(
170 |             args.num_processes, 1 + self.num_sem_categories, vr, vr,
171 |             self.max_height - self.min_height
172 |         ).float().to(self.device)
173 |         self.feat = torch.ones(
174 |             args.num_processes, 1 + self.num_sem_categories,
175 |             self.screen_h // self.du_scale * self.screen_w // self.du_scale
176 |         ).float().to(self.device)
177 | 
178 |     def forward(self, obs, pose_obs, maps_last, poses_last):
179 |         bs, c, h, w = obs.size()
180 |         depth = obs[:, 3, :, :]
181 | 
182 |         point_cloud_t = du.get_point_cloud_from_z_t(
183 |             depth, self.camera_matrix, self.device, scale=self.du_scale)
184 | 
185 |         agent_view_t = du.transform_camera_view_t(
186 |             point_cloud_t, self.agent_height, 0, self.device)
187 | 
188 |         agent_view_centered_t = du.transform_pose_t(
189 |             agent_view_t, self.shift_loc, self.device)
190 | 
191 |         max_h = self.max_height
192 |         min_h = self.min_height
193 |         xy_resolution = self.resolution
194 |         z_resolution = self.z_resolution
195 |         vision_range = self.vision_range
196 |         XYZ_cm_std = agent_view_centered_t.float()
197 |         XYZ_cm_std[..., :2] = (XYZ_cm_std[..., :2] / xy_resolution)
198 |         XYZ_cm_std[..., :2] = (XYZ_cm_std[..., :2] -
199 |                                vision_range // 2.) / vision_range * 2.
200 |         XYZ_cm_std[..., 2] = XYZ_cm_std[..., 2] / z_resolution
201 |         XYZ_cm_std[..., 2] = (XYZ_cm_std[..., 2] -
202 |                               (max_h + min_h) // 2.) / (max_h - min_h) * 2.
203 |         self.feat[:, 1:, :] = nn.AvgPool2d(self.du_scale)(
204 |             obs[:, 4:, :, :]
205 |         ).view(bs, c - 4, h // self.du_scale * w // self.du_scale)
206 | 
207 |         XYZ_cm_std = XYZ_cm_std.permute(0, 3, 1, 2)
208 |         XYZ_cm_std = XYZ_cm_std.view(XYZ_cm_std.shape[0],
209 |                                      XYZ_cm_std.shape[1],
210 |                                      XYZ_cm_std.shape[2] * XYZ_cm_std.shape[3])
211 | 
212 |         voxels = du.splat_feat_nd(
213 |             self.init_grid * 0., self.feat, XYZ_cm_std).transpose(2, 3)
214 | 
215 |         min_z = int(25 / z_resolution - min_h)
216 |         max_z = int((self.agent_height + 1) / z_resolution - min_h)
217 | 
218 |         agent_height_proj = voxels[..., min_z:max_z].sum(4)
219 |         all_height_proj = voxels.sum(4)
220 | 
221 |         fp_map_pred = agent_height_proj[:, 0:1, :, :]
222 |         fp_exp_pred = all_height_proj[:, 0:1, :, :]
223 |         fp_map_pred = fp_map_pred / self.map_pred_threshold
224 |         fp_exp_pred = fp_exp_pred / self.exp_pred_threshold
225 |         fp_map_pred = torch.clamp(fp_map_pred, min=0.0, max=1.0)
226 |         fp_exp_pred = torch.clamp(fp_exp_pred, min=0.0, max=1.0)
227 | 
228 |         pose_pred = poses_last
229 | 
230 |         agent_view = torch.zeros(bs, c,
231 |                                  self.map_size_cm // self.resolution,
232 |                                  self.map_size_cm // self.resolution
233 |                                  ).to(self.device)
234 | 
235 |         x1 = self.map_size_cm // (self.resolution * 2) - self.vision_range // 2
236 |         x2 = x1 + self.vision_range
237 |         y1 = self.map_size_cm // (self.resolution * 2)
238 |         y2 = y1 + self.vision_range
239 |         agent_view[:, 0:1, y1:y2, x1:x2] = fp_map_pred
240 |         agent_view[:, 1:2, y1:y2, x1:x2] = fp_exp_pred
241 |         agent_view[:, 4:, y1:y2, x1:x2] = torch.clamp(
242 |             agent_height_proj[:, 1:, :, :] / self.cat_pred_threshold,
243 |             min=0.0, max=1.0)
244 | 
245 |         corrected_pose = pose_obs
246 | 
247 |         def get_new_pose_batch(pose, rel_pose_change):
248 | 
249 |             pose[:, 1] += rel_pose_change[:, 0] * \
250 |                 torch.sin(pose[:, 2] / 57.29577951308232) \
251 |                 + rel_pose_change[:, 1] * \
252 |                 torch.cos(pose[:, 2] / 57.29577951308232)
253 |             pose[:, 0] += rel_pose_change[:, 0] * \
254 |                 torch.cos(pose[:, 2] / 57.29577951308232) \
255 |                 - rel_pose_change[:, 1] * \
256 |                 torch.sin(pose[:, 2] / 57.29577951308232)
257 |             pose[:, 2] += rel_pose_change[:, 2] * 57.29577951308232
258 | 
259 |             pose[:, 2] = torch.fmod(pose[:, 2] - 180.0, 360.0) + 180.0
260 |             pose[:, 2] = torch.fmod(pose[:, 2] + 180.0, 360.0) - 180.0
261 | 
262 |             return pose
263 | 
264 |         current_poses = get_new_pose_batch(poses_last, corrected_pose)
265 |         st_pose = current_poses.clone().detach()
266 | 
267 |         st_pose[:, :2] = - (st_pose[:, :2]
268 |                             * 100.0 / self.resolution
269 |                             - self.map_size_cm // (self.resolution * 2)) /\
270 |             (self.map_size_cm // (self.resolution * 2))
271 |         st_pose[:, 2] = 90. - (st_pose[:, 2])
272 | 
273 |         rot_mat, trans_mat = get_grid(st_pose, agent_view.size(),
274 |                                       self.device)
275 | 
276 |         rotated = F.grid_sample(agent_view, rot_mat, align_corners=True)
277 |         translated = F.grid_sample(rotated, trans_mat, align_corners=True)
278 | 
279 |         maps2 = torch.cat((maps_last.unsqueeze(1), translated.unsqueeze(1)), 1)
280 | 
281 |         map_pred, _ = torch.max(maps2, 1)
282 | 
283 |         return fp_map_pred, map_pred, pose_pred, current_poses
284 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-fmm==2019.1.30
2 | scikit-learn==0.22.2.post1
3 | scikit-image==0.15.0
4 | numpy>=1.20.2
5 | ifcfg
6 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | from envs import make_vec_envs
 6 | from arguments import get_args
 7 | 
 8 | os.environ["OMP_NUM_THREADS"] = "1"
 9 | 
10 | args = get_args()
11 | 
12 | np.random.seed(args.seed)
13 | torch.manual_seed(args.seed)
14 | 
15 | if args.cuda:
16 |     torch.cuda.manual_seed(args.seed)
17 | 
18 | 
19 | def main():
20 |     num_episodes = int(args.num_eval_episodes)
21 |     args.device = torch.device("cuda:0" if args.cuda else "cpu")
22 | 
23 |     torch.set_num_threads(1)
24 |     envs = make_vec_envs(args)
25 |     obs, infos = envs.reset()
26 | 
27 |     for ep_num in range(num_episodes):
28 |         for step in range(args.max_episode_length):
29 |             action = torch.randint(0, 3, (args.num_processes,))
30 |             obs, rew, done, infos = envs.step(action)
31 | 
32 |             if done:
33 |                 break
34 | 
35 |     print("Test successfully completed")
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/utils/distributions.py:
--------------------------------------------------------------------------------
 1 | # The following code is largely borrowed from:
 2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/distributions.py
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from utils.model import AddBias
 8 | 
 9 | """
10 | Modify standard PyTorch distributions so they are compatible with this code.
11 | """
12 | 
13 | FixedCategorical = torch.distributions.Categorical
14 | 
15 | old_sample = FixedCategorical.sample
16 | FixedCategorical.sample = lambda self: old_sample(self)
17 | 
18 | log_prob_cat = FixedCategorical.log_prob
19 | FixedCategorical.log_probs = lambda self, actions: \
20 |     log_prob_cat(self, actions.squeeze(-1))
21 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True)
22 | 
23 | FixedNormal = torch.distributions.Normal
24 | log_prob_normal = FixedNormal.log_prob
25 | FixedNormal.log_probs = lambda self, actions: \
26 |     log_prob_normal(self, actions).sum(-1, keepdim=False)
27 | 
28 | entropy = FixedNormal.entropy
29 | FixedNormal.entropy = lambda self: entropy(self).sum(-1)
30 | 
31 | FixedNormal.mode = lambda self: self.mean
32 | 
33 | 
34 | class Categorical(nn.Module):
35 | 
36 |     def __init__(self, num_inputs, num_outputs):
37 |         super(Categorical, self).__init__()
38 |         self.linear = nn.Linear(num_inputs, num_outputs)
39 | 
40 |     def forward(self, x):
41 |         x = self.linear(x)
42 |         return FixedCategorical(logits=x)
43 | 
44 | 
45 | class DiagGaussian(nn.Module):
46 | 
47 |     def __init__(self, num_inputs, num_outputs):
48 |         super(DiagGaussian, self).__init__()
49 | 
50 |         self.fc_mean = nn.Linear(num_inputs, num_outputs)
51 |         self.logstd = AddBias(torch.zeros(num_outputs))
52 | 
53 |     def forward(self, x):
54 |         action_mean = self.fc_mean(x)
55 | 
56 |         zeros = torch.zeros(action_mean.size())
57 |         if x.is_cuda:
58 |             zeros = zeros.cuda()
59 | 
60 |         action_logstd = self.logstd(zeros)
61 |         return FixedNormal(action_mean, action_logstd.exp())
62 | 


--------------------------------------------------------------------------------
/utils/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch import nn
  4 | from torch.nn import functional as F
  5 | 
  6 | 
  7 | def get_grid(pose, grid_size, device):
  8 |     """
  9 |     Input:
 10 |         `pose` FloatTensor(bs, 3)
 11 |         `grid_size` 4-tuple (bs, _, grid_h, grid_w)
 12 |         `device` torch.device (cpu or gpu)
 13 |     Output:
 14 |         `rot_grid` FloatTensor(bs, grid_h, grid_w, 2)
 15 |         `trans_grid` FloatTensor(bs, grid_h, grid_w, 2)
 16 | 
 17 |     """
 18 |     pose = pose.float()
 19 |     x = pose[:, 0]
 20 |     y = pose[:, 1]
 21 |     t = pose[:, 2]
 22 | 
 23 |     bs = x.size(0)
 24 |     t = t * np.pi / 180.
 25 |     cos_t = t.cos()
 26 |     sin_t = t.sin()
 27 | 
 28 |     theta11 = torch.stack([cos_t, -sin_t,
 29 |                            torch.zeros(cos_t.shape).float().to(device)], 1)
 30 |     theta12 = torch.stack([sin_t, cos_t,
 31 |                            torch.zeros(cos_t.shape).float().to(device)], 1)
 32 |     theta1 = torch.stack([theta11, theta12], 1)
 33 | 
 34 |     theta21 = torch.stack([torch.ones(x.shape).to(device),
 35 |                            -torch.zeros(x.shape).to(device), x], 1)
 36 |     theta22 = torch.stack([torch.zeros(x.shape).to(device),
 37 |                            torch.ones(x.shape).to(device), y], 1)
 38 |     theta2 = torch.stack([theta21, theta22], 1)
 39 | 
 40 |     rot_grid = F.affine_grid(theta1, torch.Size(grid_size))
 41 |     trans_grid = F.affine_grid(theta2, torch.Size(grid_size))
 42 | 
 43 |     return rot_grid, trans_grid
 44 | 
 45 | 
 46 | class ChannelPool(nn.MaxPool1d):
 47 |     def forward(self, x):
 48 |         n, c, w, h = x.size()
 49 |         x = x.view(n, c, w * h).permute(0, 2, 1)
 50 |         x = x.contiguous()
 51 |         pooled = F.max_pool1d(x, c, 1)
 52 |         _, _, c = pooled.size()
 53 |         pooled = pooled.permute(0, 2, 1)
 54 |         return pooled.view(n, c, w, h)
 55 | 
 56 | 
 57 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/utils.py#L32
 58 | class AddBias(nn.Module):
 59 |     def __init__(self, bias):
 60 |         super(AddBias, self).__init__()
 61 |         self._bias = nn.Parameter(bias.unsqueeze(1))
 62 | 
 63 |     def forward(self, x):
 64 |         if x.dim() == 2:
 65 |             bias = self._bias.t().view(1, -1)
 66 |         else:
 67 |             bias = self._bias.t().view(1, -1, 1, 1)
 68 | 
 69 |         return x + bias
 70 | 
 71 | 
 72 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L10
 73 | class Flatten(nn.Module):
 74 |     def forward(self, x):
 75 |         return x.view(x.size(0), -1)
 76 | 
 77 | 
 78 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L82
 79 | class NNBase(nn.Module):
 80 | 
 81 |     def __init__(self, recurrent, recurrent_input_size, hidden_size):
 82 | 
 83 |         super(NNBase, self).__init__()
 84 |         self._hidden_size = hidden_size
 85 |         self._recurrent = recurrent
 86 | 
 87 |         if recurrent:
 88 |             self.gru = nn.GRUCell(recurrent_input_size, hidden_size)
 89 |             nn.init.orthogonal_(self.gru.weight_ih.data)
 90 |             nn.init.orthogonal_(self.gru.weight_hh.data)
 91 |             self.gru.bias_ih.data.fill_(0)
 92 |             self.gru.bias_hh.data.fill_(0)
 93 | 
 94 |     @property
 95 |     def is_recurrent(self):
 96 |         return self._recurrent
 97 | 
 98 |     @property
 99 |     def rec_state_size(self):
100 |         if self._recurrent:
101 |             return self._hidden_size
102 |         return 1
103 | 
104 |     @property
105 |     def output_size(self):
106 |         return self._hidden_size
107 | 
108 |     def _forward_gru(self, x, hxs, masks):
109 |         if x.size(0) == hxs.size(0):
110 |             x = hxs = self.gru(x, hxs * masks[:, None])
111 |         else:
112 |             # x is a (T, N, -1) tensor that has been flatten to (T * N, -1)
113 |             N = hxs.size(0)
114 |             T = int(x.size(0) / N)
115 | 
116 |             # unflatten
117 |             x = x.view(T, N, x.size(1))
118 | 
119 |             # Same deal with masks
120 |             masks = masks.view(T, N, 1)
121 | 
122 |             outputs = []
123 |             for i in range(T):
124 |                 hx = hxs = self.gru(x[i], hxs * masks[i])
125 |                 outputs.append(hx)
126 | 
127 |             # x is a (T, N, -1) tensor
128 |             x = torch.stack(outputs, dim=0)
129 |             # flatten
130 |             x = x.view(T * N, -1)
131 | 
132 |         return x, hxs
133 | 


--------------------------------------------------------------------------------
/utils/optimization.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import re
 3 | 
 4 | from torch import optim
 5 | 
 6 | 
 7 | def get_optimizer(parameters, s):
 8 |     """
 9 |     Parse optimizer parameters.
10 |     Input should be of the form:
11 |         - "sgd,lr=0.01"
12 |         - "adagrad,lr=0.1,lr_decay=0.05"
13 |     """
14 |     if "," in s:
15 |         method = s[:s.find(',')]
16 |         optim_params = {}
17 |         for x in s[s.find(',') + 1:].split(','):
18 |             split = x.split('=')
19 |             assert len(split) == 2
20 |             assert re.match(
21 |                 r"^[+-]?(\d+(\.\d*)?|\.\d+)$",
22 |                 split[1]) is not None
23 |             optim_params[split[0]] = float(split[1])
24 |     else:
25 |         method = s
26 |         optim_params = {}
27 | 
28 |     if method == 'adadelta':
29 |         optim_fn = optim.Adadelta
30 |     elif method == 'adagrad':
31 |         optim_fn = optim.Adagrad
32 |     elif method == 'adam':
33 |         optim_fn = optim.Adam
34 |         optim_params['betas'] = (optim_params.get('beta1', 0.5),
35 |                                  optim_params.get('beta2', 0.999))
36 |         optim_params.pop('beta1', None)
37 |         optim_params.pop('beta2', None)
38 |     elif method == 'adamax':
39 |         optim_fn = optim.Adamax
40 |     elif method == 'asgd':
41 |         optim_fn = optim.ASGD
42 |     elif method == 'rmsprop':
43 |         optim_fn = optim.RMSprop
44 |     elif method == 'rprop':
45 |         optim_fn = optim.Rprop
46 |     elif method == 'sgd':
47 |         optim_fn = optim.SGD
48 |         assert 'lr' in optim_params
49 |     else:
50 |         raise Exception('Unknown optimization method: "%s"' % method)
51 | 
52 |     # check that we give good parameters to the optimizer
53 |     expected_args = inspect.getargspec(optim_fn.__init__)[0]
54 |     assert expected_args[:2] == ['self', 'params']
55 |     if not all(k in expected_args[2:] for k in optim_params.keys()):
56 |         raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
57 |             str(expected_args[2:]), str(optim_params.keys())))
58 | 
59 |     return optim_fn(parameters, **optim_params)
60 | 


--------------------------------------------------------------------------------
/utils/storage.py:
--------------------------------------------------------------------------------
  1 | # The following code is largely borrowed from:
  2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/storage.py
  3 | 
  4 | from collections import namedtuple
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
  9 | 
 10 | 
 11 | def _flatten_helper(T, N, _tensor):
 12 |     return _tensor.view(T * N, *_tensor.size()[2:])
 13 | 
 14 | 
 15 | class RolloutStorage(object):
 16 | 
 17 |     def __init__(self, num_steps, num_processes, obs_shape, action_space,
 18 |                  rec_state_size):
 19 | 
 20 |         if action_space.__class__.__name__ == 'Discrete':
 21 |             self.n_actions = 1
 22 |             action_type = torch.long
 23 |         else:
 24 |             self.n_actions = action_space.shape[0]
 25 |             action_type = torch.float32
 26 | 
 27 |         self.obs = torch.zeros(num_steps + 1, num_processes, *obs_shape)
 28 |         self.rec_states = torch.zeros(num_steps + 1, num_processes,
 29 |                                       rec_state_size)
 30 |         self.rewards = torch.zeros(num_steps, num_processes)
 31 |         self.value_preds = torch.zeros(num_steps + 1, num_processes)
 32 |         self.returns = torch.zeros(num_steps + 1, num_processes)
 33 |         self.action_log_probs = torch.zeros(num_steps, num_processes)
 34 |         self.actions = torch.zeros((num_steps, num_processes, self.n_actions),
 35 |                                    dtype=action_type)
 36 |         self.masks = torch.ones(num_steps + 1, num_processes)
 37 | 
 38 |         self.num_steps = num_steps
 39 |         self.step = 0
 40 |         self.has_extras = False
 41 |         self.extras_size = None
 42 | 
 43 |     def to(self, device):
 44 |         self.obs = self.obs.to(device)
 45 |         self.rec_states = self.rec_states.to(device)
 46 |         self.rewards = self.rewards.to(device)
 47 |         self.value_preds = self.value_preds.to(device)
 48 |         self.returns = self.returns.to(device)
 49 |         self.action_log_probs = self.action_log_probs.to(device)
 50 |         self.actions = self.actions.to(device)
 51 |         self.masks = self.masks.to(device)
 52 |         if self.has_extras:
 53 |             self.extras = self.extras.to(device)
 54 |         return self
 55 | 
 56 |     def insert(self, obs, rec_states, actions, action_log_probs, value_preds,
 57 |                rewards, masks):
 58 |         self.obs[self.step + 1].copy_(obs)
 59 |         self.rec_states[self.step + 1].copy_(rec_states)
 60 |         self.actions[self.step].copy_(actions.view(-1, self.n_actions))
 61 |         self.action_log_probs[self.step].copy_(action_log_probs)
 62 |         self.value_preds[self.step].copy_(value_preds)
 63 |         self.rewards[self.step].copy_(rewards)
 64 |         self.masks[self.step + 1].copy_(masks)
 65 | 
 66 |         self.step = (self.step + 1) % self.num_steps
 67 | 
 68 |     def after_update(self):
 69 |         self.obs[0].copy_(self.obs[-1])
 70 |         self.rec_states[0].copy_(self.rec_states[-1])
 71 |         self.masks[0].copy_(self.masks[-1])
 72 |         if self.has_extras:
 73 |             self.extras[0].copy_(self.extras[-1])
 74 | 
 75 |     def compute_returns(self, next_value, use_gae, gamma, tau):
 76 |         if use_gae:
 77 |             self.value_preds[-1] = next_value
 78 |             gae = 0
 79 |             for step in reversed(range(self.rewards.size(0))):
 80 |                 delta = self.rewards[step] + gamma \
 81 |                     * self.value_preds[step + 1] * self.masks[step + 1] \
 82 |                     - self.value_preds[step]
 83 |                 gae = delta + gamma * tau * self.masks[step + 1] * gae
 84 |                 self.returns[step] = gae + self.value_preds[step]
 85 |         else:
 86 |             self.returns[-1] = next_value
 87 |             for step in reversed(range(self.rewards.size(0))):
 88 |                 self.returns[step] = self.returns[step + 1] * gamma \
 89 |                     * self.masks[step + 1] + self.rewards[step]
 90 | 
 91 |     def feed_forward_generator(self, advantages, num_mini_batch):
 92 | 
 93 |         num_steps, num_processes = self.rewards.size()[0:2]
 94 |         batch_size = num_processes * num_steps
 95 |         mini_batch_size = batch_size // num_mini_batch
 96 |         assert batch_size >= num_mini_batch, (
 97 |             "PPO requires the number of processes ({}) "
 98 |             "* number of steps ({}) = {} "
 99 |             "to be greater than or equal to "
100 |             "the number of PPO mini batches ({})."
101 |             "".format(num_processes, num_steps, num_processes * num_steps,
102 |                       num_mini_batch))
103 | 
104 |         sampler = BatchSampler(SubsetRandomSampler(range(batch_size)),
105 |                                mini_batch_size, drop_last=False)
106 | 
107 |         for indices in sampler:
108 |             yield {
109 |                 'obs': self.obs[:-1].view(-1, *self.obs.size()[2:])[indices],
110 |                 'rec_states': self.rec_states[:-1].view(
111 |                     -1, self.rec_states.size(-1))[indices],
112 |                 'actions': self.actions.view(-1, self.n_actions)[indices],
113 |                 'value_preds': self.value_preds[:-1].view(-1)[indices],
114 |                 'returns': self.returns[:-1].view(-1)[indices],
115 |                 'masks': self.masks[:-1].view(-1)[indices],
116 |                 'old_action_log_probs': self.action_log_probs.view(-1)[indices],
117 |                 'adv_targ': advantages.view(-1)[indices],
118 |                 'extras': self.extras[:-1].view(
119 |                     -1, self.extras_size)[indices]
120 |                 if self.has_extras else None,
121 |             }
122 | 
123 |     def recurrent_generator(self, advantages, num_mini_batch):
124 | 
125 |         num_processes = self.rewards.size(1)
126 |         assert num_processes >= num_mini_batch, (
127 |             "PPO requires the number of processes ({}) "
128 |             "to be greater than or equal to the number of "
129 |             "PPO mini batches ({}).".format(num_processes, num_mini_batch))
130 |         num_envs_per_batch = num_processes // num_mini_batch
131 |         perm = torch.randperm(num_processes)
132 |         T, N = self.num_steps, num_envs_per_batch
133 | 
134 |         for start_ind in range(0, num_processes, num_envs_per_batch):
135 | 
136 |             obs = []
137 |             rec_states = []
138 |             actions = []
139 |             value_preds = []
140 |             returns = []
141 |             masks = []
142 |             old_action_log_probs = []
143 |             adv_targ = []
144 |             if self.has_extras:
145 |                 extras = []
146 | 
147 |             for offset in range(num_envs_per_batch):
148 | 
149 |                 ind = perm[start_ind + offset]
150 |                 obs.append(self.obs[:-1, ind])
151 |                 rec_states.append(self.rec_states[0:1, ind])
152 |                 actions.append(self.actions[:, ind])
153 |                 value_preds.append(self.value_preds[:-1, ind])
154 |                 returns.append(self.returns[:-1, ind])
155 |                 masks.append(self.masks[:-1, ind])
156 |                 old_action_log_probs.append(self.action_log_probs[:, ind])
157 |                 adv_targ.append(advantages[:, ind])
158 |                 if self.has_extras:
159 |                     extras.append(self.extras[:-1, ind])
160 | 
161 |             # These are all tensors of size (T, N, ...)
162 |             obs = torch.stack(obs, 1)
163 |             actions = torch.stack(actions, 1)
164 |             value_preds = torch.stack(value_preds, 1)
165 |             returns = torch.stack(returns, 1)
166 |             masks = torch.stack(masks, 1)
167 |             old_action_log_probs = torch.stack(old_action_log_probs, 1)
168 |             adv_targ = torch.stack(adv_targ, 1)
169 |             if self.has_extras:
170 |                 extras = torch.stack(extras, 1)
171 | 
172 |             yield {
173 |                 'obs': _flatten_helper(T, N, obs),
174 |                 'actions': _flatten_helper(T, N, actions),
175 |                 'value_preds': _flatten_helper(T, N, value_preds),
176 |                 'returns': _flatten_helper(T, N, returns),
177 |                 'masks': _flatten_helper(T, N, masks),
178 |                 'old_action_log_probs': _flatten_helper(
179 |                     T, N, old_action_log_probs),
180 |                 'adv_targ': _flatten_helper(T, N, adv_targ),
181 |                 'extras': _flatten_helper(
182 |                     T, N, extras) if self.has_extras else None,
183 |                 'rec_states': torch.stack(rec_states, 1).view(N, -1),
184 |             }
185 | 
186 | 
187 | class GlobalRolloutStorage(RolloutStorage):
188 | 
189 |     def __init__(self, num_steps, num_processes, obs_shape, action_space,
190 |                  rec_state_size, extras_size):
191 |         super(GlobalRolloutStorage, self).__init__(
192 |             num_steps, num_processes, obs_shape, action_space, rec_state_size)
193 |         self.extras = torch.zeros((num_steps + 1, num_processes, extras_size),
194 |                                   dtype=torch.long)
195 |         self.has_extras = True
196 |         self.extras_size = extras_size
197 | 
198 |     def insert(self, obs, rec_states, actions, action_log_probs, value_preds,
199 |                rewards, masks, extras):
200 |         self.extras[self.step + 1].copy_(extras)
201 |         super(GlobalRolloutStorage, self).insert(
202 |             obs, rec_states, actions,
203 |             action_log_probs, value_preds, rewards, masks)
204 | 


--------------------------------------------------------------------------------