├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── agents ├── sem_exp.py └── utils │ ├── semantic_prediction.py │ └── visualization.py ├── algo ├── __init__.py └── ppo.py ├── arguments.py ├── configs ├── Base-RCNN-FPN.yaml └── COCO-InstanceSegmentation │ └── mask_rcnn_R_50_FPN_3x.yaml ├── constants.py ├── docs ├── DOCKER_INSTRUCTIONS.md ├── INSTRUCTIONS.md ├── example.gif ├── legend.png └── overview.jpg ├── envs ├── __init__.py ├── habitat │ ├── __init__.py │ ├── configs │ │ └── tasks │ │ │ └── objectnav_gibson.yaml │ ├── objectgoal_env.py │ └── utils │ │ └── vector_env.py └── utils │ ├── depth_utils.py │ ├── fmm_planner.py │ ├── map_builder.py │ ├── pose.py │ └── rotation_utils.py ├── main.py ├── model.py ├── requirements.txt ├── test.py └── utils ├── distributions.py ├── model.py ├── optimization.py └── storage.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # vim 132 | *.swo 133 | *.swp 134 | 135 | .idea/ 136 | .vscode/ 137 | *.DS_Store 138 | 139 | # Data and log folders 140 | saved/ 141 | tmp/ 142 | pretrained_models/ 143 | data/ 144 | data 145 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Base image 2 | FROM nvidia/cudagl:10.1-devel-ubuntu16.04 3 | 4 | # Setup basic packages 5 | RUN apt-get update && apt-get install -y --no-install-recommends \ 6 | build-essential \ 7 | git \ 8 | curl \ 9 | vim \ 10 | ca-certificates \ 11 | libjpeg-dev \ 12 | libpng-dev \ 13 | libglfw3-dev \ 14 | libglm-dev \ 15 | libx11-dev \ 16 | libomp-dev \ 17 | libegl1-mesa-dev \ 18 | pkg-config \ 19 | wget \ 20 | zip \ 21 | htop \ 22 | tmux \ 23 | unzip &&\ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | # Install conda 27 | RUN wget -O $HOME/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh &&\ 28 | chmod +x ~/miniconda.sh &&\ 29 | ~/miniconda.sh -b -p /custom/conda &&\ 30 | rm ~/miniconda.sh &&\ 31 | /custom/conda/bin/conda install numpy pyyaml scipy ipython mkl mkl-include &&\ 32 | /custom/conda/bin/conda clean -ya 33 | ENV PATH /custom/conda/bin:$PATH 34 | 35 | # Install cmake 36 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.14.0/cmake-3.14.0-Linux-x86_64.sh 37 | RUN mkdir /opt/cmake 38 | RUN sh /cmake-3.14.0-Linux-x86_64.sh --prefix=/opt/cmake --skip-license 39 | RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake 40 | RUN cmake --version 41 | 42 | # Setup habitat-sim 43 | RUN git clone https://github.com/facebookresearch/habitat-sim.git 44 | RUN /bin/bash -c "cd habitat-sim; git checkout tags/v0.1.5; pip install -r requirements.txt; python setup.py install --headless --with-cuda" 45 | 46 | # Install challenge specific habitat-api 47 | RUN git clone https://github.com/facebookresearch/habitat-api.git 48 | RUN /bin/bash -c "cd habitat-api; git checkout tags/v0.1.5; pip install -e ." 49 | RUN /bin/bash -c "cd habitat-api; wget http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip; unzip habitat-test-scenes.zip" 50 | 51 | # Silence habitat-sim logs 52 | ENV GLOG_minloglevel=2 53 | ENV MAGNUM_LOG="quiet" 54 | 55 | # Install project specific packages 56 | RUN /bin/bash -c "apt-get update; apt-get install -y libsm6 libxext6 libxrender-dev; pip install opencv-python" 57 | RUN /bin/bash -c "pip install --upgrade cython numpy" 58 | RUN /bin/bash -c "pip install matplotlib seaborn==0.9.0 scikit-fmm==2019.1.30 scikit-image==0.15.0 imageio==2.6.0 scikit-learn==0.22.2.post1 ifcfg" 59 | 60 | # Install pytorch and torch_scatter 61 | RUN conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.2 -c pytorch 62 | RUN /bin/bash -c "pip install torch_scatter" 63 | 64 | # Install detectron2 65 | RUN /bin/bash -c "python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.6/index.html" 66 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Devendra Chaplot 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Object Goal Navigation using Goal-Oriented Semantic Exploration 2 | This is a PyTorch implementation of the NeurIPS-20 paper: 3 | 4 | [Object Goal Navigation using Goal-Oriented Semantic Exploration](https://arxiv.org/pdf/2007.00643.pdf)
5 | Devendra Singh Chaplot, Dhiraj Gandhi, Abhinav Gupta, Ruslan Salakhutdinov
6 | Carnegie Mellon University, Facebook AI Research 7 | 8 | Winner of the [CVPR 2020 Habitat ObjectNav Challenge](https://aihabitat.org/challenge/2020/). 9 | 10 | Project Website: https://devendrachaplot.github.io/projects/semantic-exploration 11 | 12 | ![example](./docs/example.gif) 13 | 14 | ### Overview: 15 | The Goal-Oriented Semantic Exploration (SemExp) model consists of three modules: a Semantic Mapping Module, a Goal-Oriented Semantic Policy, and a deterministic Local Policy. 16 | As shown below, the Semantic Mapping model builds a semantic map over time. The Goal-Oriented Semantic Policy selects a long-term goal based on the semantic 17 | map to reach the given object goal efficiently. A deterministic local policy based on analytical planners is used to take low-level navigation actions to reach the long-term goal. 18 | 19 | ![overview](./docs/overview.jpg) 20 | 21 | ### This repository contains: 22 | - Episode train and test datasets for [Object Goal Navigation](https://arxiv.org/pdf/2007.00643.pdf) task for the Gibson dataset in the Habitat Simulator. 23 | - The code to train and evaluate the Semantic Exploration (SemExp) model on the Object Goal Navigation task. 24 | - Pretrained SemExp model. 25 | 26 | ## Installing Dependencies 27 | - We use earlier versions of [habitat-sim](https://github.com/facebookresearch/habitat-sim) and [habitat-lab](https://github.com/facebookresearch/habitat-lab) as specified below: 28 | 29 | Installing habitat-sim: 30 | ``` 31 | git clone https://github.com/facebookresearch/habitat-sim.git 32 | cd habitat-sim; git checkout tags/v0.1.5; 33 | pip install -r requirements.txt; 34 | python setup.py install --headless 35 | python setup.py install # (for Mac OS) 36 | ``` 37 | 38 | Installing habitat-lab: 39 | ``` 40 | git clone https://github.com/facebookresearch/habitat-lab.git 41 | cd habitat-lab; git checkout tags/v0.1.5; 42 | pip install -e . 43 | ``` 44 | Check habitat installation by running `python examples/benchmark.py` in the habitat-lab folder. 45 | 46 | - Install [pytorch](https://pytorch.org/) according to your system configuration. The code is tested on pytorch v1.6.0 and cudatoolkit v10.2. If you are using conda: 47 | ``` 48 | conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 #(Linux with GPU) 49 | conda install pytorch==1.6.0 torchvision==0.7.0 -c pytorch #(Mac OS) 50 | ``` 51 | 52 | - Install [detectron2](https://github.com/facebookresearch/detectron2/) according to your system configuration. If you are using conda: 53 | ``` 54 | python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.6/index.html #(Linux with GPU) 55 | CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' #(Mac OS) 56 | ``` 57 | 58 | ### Docker and Singularity images: 59 | We provide experimental [docker](https://www.docker.com/) and [singularity](https://sylabs.io/) images with all the dependencies installed, see [Docker Instructions](./docs/DOCKER_INSTRUCTIONS.md). 60 | 61 | 62 | ## Setup 63 | Clone the repository and install other requirements: 64 | ``` 65 | git clone https://github.com/devendrachaplot/Object-Goal-Navigation/ 66 | cd Object-Goal-Navigation/; 67 | pip install -r requirements.txt 68 | ``` 69 | 70 | ### Downloading scene dataset 71 | - Download the Gibson dataset using the instructions here: https://github.com/facebookresearch/habitat-lab#scenes-datasets (download the 11GB file `gibson_habitat_trainval.zip`) 72 | - Move the Gibson scene dataset or create a symlink at `data/scene_datasets/gibson_semantic`. 73 | 74 | ### Downloading episode dataset 75 | - Download the episode dataset: 76 | ``` 77 | wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1tslnZAkH8m3V5nP8pbtBmaR2XEfr8Rau' -O objectnav_gibson_v1.1.zip 78 | ``` 79 | - Unzip the dataset into `data/datasets/objectnav/gibson/v1.1/` 80 | 81 | ### Setting up datasets 82 | The code requires the datasets in a `data` folder in the following format (same as habitat-lab): 83 | ``` 84 | Object-Goal-Navigation/ 85 | data/ 86 | scene_datasets/ 87 | gibson_semantic/ 88 | Adrian.glb 89 | Adrian.navmesh 90 | ... 91 | datasets/ 92 | objectnav/ 93 | gibson/ 94 | v1.1/ 95 | train/ 96 | val/ 97 | ``` 98 | 99 | 100 | ### Test setup 101 | To verify that the data is setup correctly, run: 102 | ``` 103 | python test.py --agent random -n1 --num_eval_episodes 1 --auto_gpu_config 0 104 | ``` 105 | 106 | ## Usage 107 | 108 | ### Training: 109 | For training the SemExp model on the Object Goal Navigation task: 110 | ``` 111 | python main.py 112 | ``` 113 | 114 | ### Downloading pre-trained models 115 | ``` 116 | mkdir pretrained_models; 117 | wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=171ZA7XNu5vi3XLpuKs8DuGGZrYyuSjL0' -O pretrained_models/sem_exp.pth 118 | ``` 119 | 120 | ### For evaluation: 121 | For evaluating the pre-trained model: 122 | ``` 123 | python main.py --split val --eval 1 --load pretrained_models/sem_exp.pth 124 | ``` 125 | 126 | For visualizing the agent observations and predicted semantic map, add `-v 1` as an argument to the above command. 127 | 128 | The pre-trained model should get 0.657 Success, 0.339 SPL and 1.474 DTG. 129 | 130 | For more detailed instructions, see [INSTRUCTIONS](./docs/INSTRUCTIONS.md). 131 | 132 | 133 | ## Cite as 134 | >Chaplot, D.S., Gandhi, D., Gupta, A. and Salakhutdinov, R., 2020. Object Goal Navigation using Goal-Oriented Semantic Exploration. In Neural Information Processing Systems (NeurIPS-20). ([PDF](https://arxiv.org/pdf/2007.00643.pdf)) 135 | 136 | ### Bibtex: 137 | ``` 138 | @inproceedings{chaplot2020object, 139 | title={Object Goal Navigation using Goal-Oriented Semantic Exploration}, 140 | author={Chaplot, Devendra Singh and Gandhi, Dhiraj and 141 | Gupta, Abhinav and Salakhutdinov, Ruslan}, 142 | booktitle={In Neural Information Processing Systems (NeurIPS)}, 143 | year={2020} 144 | } 145 | ``` 146 | 147 | ## Related Projects 148 | - This project builds on the [Active Neural SLAM](https://devendrachaplot.github.io/projects/Neural-SLAM) paper. The code and pretrained models for the Active Neural SLAM system are available at: 149 | https://github.com/devendrachaplot/Neural-SLAM. 150 | - The Semantic Mapping module is similar to the one used in [Semantic Curiosity](https://devendrachaplot.github.io/projects/SemanticCuriosity). 151 | 152 | ## Acknowledgements 153 | This repository uses [Habitat Lab](https://github.com/facebookresearch/habitat-lab) implementation for running the RL environment. 154 | The implementation of PPO is borrowed from [ikostrikov/pytorch-a2c-ppo-acktr-gail](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/). 155 | The Mask-RCNN implementation is based on the [detectron2](https://github.com/facebookresearch/detectron2/) repository. We would also like to thank Shubham Tulsiani and Saurabh Gupta for their help in implementing some parts of the code. 156 | -------------------------------------------------------------------------------- /agents/sem_exp.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import cv2 4 | import numpy as np 5 | import skimage.morphology 6 | from PIL import Image 7 | from torchvision import transforms 8 | 9 | from envs.utils.fmm_planner import FMMPlanner 10 | from envs.habitat.objectgoal_env import ObjectGoal_Env 11 | from agents.utils.semantic_prediction import SemanticPredMaskRCNN 12 | from constants import color_palette 13 | import envs.utils.pose as pu 14 | import agents.utils.visualization as vu 15 | 16 | 17 | class Sem_Exp_Env_Agent(ObjectGoal_Env): 18 | """The Sem_Exp environment agent class. A seperate Sem_Exp_Env_Agent class 19 | object is used for each environment thread. 20 | 21 | """ 22 | 23 | def __init__(self, args, rank, config_env, dataset): 24 | 25 | self.args = args 26 | super().__init__(args, rank, config_env, dataset) 27 | 28 | # initialize transform for RGB observations 29 | self.res = transforms.Compose( 30 | [transforms.ToPILImage(), 31 | transforms.Resize((args.frame_height, args.frame_width), 32 | interpolation=Image.NEAREST)]) 33 | 34 | # initialize semantic segmentation prediction model 35 | if args.sem_gpu_id == -1: 36 | args.sem_gpu_id = config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID 37 | 38 | self.sem_pred = SemanticPredMaskRCNN(args) 39 | 40 | # initializations for planning: 41 | self.selem = skimage.morphology.disk(3) 42 | 43 | self.obs = None 44 | self.obs_shape = None 45 | self.collision_map = None 46 | self.visited = None 47 | self.visited_vis = None 48 | self.col_width = None 49 | self.curr_loc = None 50 | self.last_loc = None 51 | self.last_action = None 52 | self.count_forward_actions = None 53 | 54 | if args.visualize or args.print_images: 55 | self.legend = cv2.imread('docs/legend.png') 56 | self.vis_image = None 57 | self.rgb_vis = None 58 | 59 | def reset(self): 60 | args = self.args 61 | 62 | obs, info = super().reset() 63 | obs = self._preprocess_obs(obs) 64 | 65 | self.obs_shape = obs.shape 66 | 67 | # Episode initializations 68 | map_shape = (args.map_size_cm // args.map_resolution, 69 | args.map_size_cm // args.map_resolution) 70 | self.collision_map = np.zeros(map_shape) 71 | self.visited = np.zeros(map_shape) 72 | self.visited_vis = np.zeros(map_shape) 73 | self.col_width = 1 74 | self.count_forward_actions = 0 75 | self.curr_loc = [args.map_size_cm / 100.0 / 2.0, 76 | args.map_size_cm / 100.0 / 2.0, 0.] 77 | self.last_action = None 78 | 79 | if args.visualize or args.print_images: 80 | self.vis_image = vu.init_vis_image(self.goal_name, self.legend) 81 | 82 | return obs, info 83 | 84 | def plan_act_and_preprocess(self, planner_inputs): 85 | """Function responsible for planning, taking the action and 86 | preprocessing observations 87 | 88 | Args: 89 | planner_inputs (dict): 90 | dict with following keys: 91 | 'map_pred' (ndarray): (M, M) map prediction 92 | 'goal' (ndarray): (M, M) mat denoting goal locations 93 | 'pose_pred' (ndarray): (7,) array denoting pose (x,y,o) 94 | and planning window (gx1, gx2, gy1, gy2) 95 | 'found_goal' (bool): whether the goal object is found 96 | 97 | Returns: 98 | obs (ndarray): preprocessed observations ((4+C) x H x W) 99 | reward (float): amount of reward returned after previous action 100 | done (bool): whether the episode has ended 101 | info (dict): contains timestep, pose, goal category and 102 | evaluation metric info 103 | """ 104 | 105 | # plan 106 | if planner_inputs["wait"]: 107 | self.last_action = None 108 | self.info["sensor_pose"] = [0., 0., 0.] 109 | return np.zeros(self.obs.shape), 0., False, self.info 110 | 111 | # Reset reward if new long-term goal 112 | if planner_inputs["new_goal"]: 113 | self.info["g_reward"] = 0 114 | 115 | action = self._plan(planner_inputs) 116 | 117 | if self.args.visualize or self.args.print_images: 118 | self._visualize(planner_inputs) 119 | 120 | if action >= 0: 121 | 122 | # act 123 | action = {'action': action} 124 | obs, rew, done, info = super().step(action) 125 | 126 | # preprocess obs 127 | obs = self._preprocess_obs(obs) 128 | self.last_action = action['action'] 129 | self.obs = obs 130 | self.info = info 131 | 132 | info['g_reward'] += rew 133 | 134 | return obs, rew, done, info 135 | 136 | else: 137 | self.last_action = None 138 | self.info["sensor_pose"] = [0., 0., 0.] 139 | return np.zeros(self.obs_shape), 0., False, self.info 140 | 141 | def _plan(self, planner_inputs): 142 | """Function responsible for planning 143 | 144 | Args: 145 | planner_inputs (dict): 146 | dict with following keys: 147 | 'map_pred' (ndarray): (M, M) map prediction 148 | 'goal' (ndarray): (M, M) goal locations 149 | 'pose_pred' (ndarray): (7,) array denoting pose (x,y,o) 150 | and planning window (gx1, gx2, gy1, gy2) 151 | 'found_goal' (bool): whether the goal object is found 152 | 153 | Returns: 154 | action (int): action id 155 | """ 156 | args = self.args 157 | 158 | self.last_loc = self.curr_loc 159 | 160 | # Get Map prediction 161 | map_pred = np.rint(planner_inputs['map_pred']) 162 | goal = planner_inputs['goal'] 163 | 164 | # Get pose prediction and global policy planning window 165 | start_x, start_y, start_o, gx1, gx2, gy1, gy2 = \ 166 | planner_inputs['pose_pred'] 167 | gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2) 168 | planning_window = [gx1, gx2, gy1, gy2] 169 | 170 | # Get curr loc 171 | self.curr_loc = [start_x, start_y, start_o] 172 | r, c = start_y, start_x 173 | start = [int(r * 100.0 / args.map_resolution - gx1), 174 | int(c * 100.0 / args.map_resolution - gy1)] 175 | start = pu.threshold_poses(start, map_pred.shape) 176 | 177 | self.visited[gx1:gx2, gy1:gy2][start[0] - 0:start[0] + 1, 178 | start[1] - 0:start[1] + 1] = 1 179 | 180 | if args.visualize or args.print_images: 181 | # Get last loc 182 | last_start_x, last_start_y = self.last_loc[0], self.last_loc[1] 183 | r, c = last_start_y, last_start_x 184 | last_start = [int(r * 100.0 / args.map_resolution - gx1), 185 | int(c * 100.0 / args.map_resolution - gy1)] 186 | last_start = pu.threshold_poses(last_start, map_pred.shape) 187 | self.visited_vis[gx1:gx2, gy1:gy2] = \ 188 | vu.draw_line(last_start, start, 189 | self.visited_vis[gx1:gx2, gy1:gy2]) 190 | 191 | # Collision check 192 | if self.last_action == 1: 193 | x1, y1, t1 = self.last_loc 194 | x2, y2, _ = self.curr_loc 195 | buf = 4 196 | length = 2 197 | 198 | if abs(x1 - x2) < 0.05 and abs(y1 - y2) < 0.05: 199 | self.col_width += 2 200 | if self.col_width == 7: 201 | length = 4 202 | buf = 3 203 | self.col_width = min(self.col_width, 5) 204 | else: 205 | self.col_width = 1 206 | 207 | dist = pu.get_l2_distance(x1, x2, y1, y2) 208 | if dist < args.collision_threshold: # Collision 209 | width = self.col_width 210 | for i in range(length): 211 | for j in range(width): 212 | wx = x1 + 0.05 * \ 213 | ((i + buf) * np.cos(np.deg2rad(t1)) 214 | + (j - width // 2) * np.sin(np.deg2rad(t1))) 215 | wy = y1 + 0.05 * \ 216 | ((i + buf) * np.sin(np.deg2rad(t1)) 217 | - (j - width // 2) * np.cos(np.deg2rad(t1))) 218 | r, c = wy, wx 219 | r, c = int(r * 100 / args.map_resolution), \ 220 | int(c * 100 / args.map_resolution) 221 | [r, c] = pu.threshold_poses([r, c], 222 | self.collision_map.shape) 223 | self.collision_map[r, c] = 1 224 | 225 | stg, stop = self._get_stg(map_pred, start, np.copy(goal), 226 | planning_window) 227 | 228 | # Deterministic Local Policy 229 | if stop and planner_inputs['found_goal'] == 1: 230 | action = 0 # Stop 231 | else: 232 | (stg_x, stg_y) = stg 233 | angle_st_goal = math.degrees(math.atan2(stg_x - start[0], 234 | stg_y - start[1])) 235 | angle_agent = (start_o) % 360.0 236 | if angle_agent > 180: 237 | angle_agent -= 360 238 | 239 | relative_angle = (angle_agent - angle_st_goal) % 360.0 240 | if relative_angle > 180: 241 | relative_angle -= 360 242 | 243 | if relative_angle > self.args.turn_angle / 2.: 244 | action = 3 # Right 245 | elif relative_angle < -self.args.turn_angle / 2.: 246 | action = 2 # Left 247 | else: 248 | action = 1 # Forward 249 | 250 | return action 251 | 252 | def _get_stg(self, grid, start, goal, planning_window): 253 | """Get short-term goal""" 254 | 255 | [gx1, gx2, gy1, gy2] = planning_window 256 | 257 | x1, y1, = 0, 0 258 | x2, y2 = grid.shape 259 | 260 | def add_boundary(mat, value=1): 261 | h, w = mat.shape 262 | new_mat = np.zeros((h + 2, w + 2)) + value 263 | new_mat[1:h + 1, 1:w + 1] = mat 264 | return new_mat 265 | 266 | traversible = skimage.morphology.binary_dilation( 267 | grid[x1:x2, y1:y2], 268 | self.selem) != True 269 | traversible[self.collision_map[gx1:gx2, gy1:gy2] 270 | [x1:x2, y1:y2] == 1] = 0 271 | traversible[self.visited[gx1:gx2, gy1:gy2][x1:x2, y1:y2] == 1] = 1 272 | 273 | traversible[int(start[0] - x1) - 1:int(start[0] - x1) + 2, 274 | int(start[1] - y1) - 1:int(start[1] - y1) + 2] = 1 275 | 276 | traversible = add_boundary(traversible) 277 | goal = add_boundary(goal, value=0) 278 | 279 | planner = FMMPlanner(traversible) 280 | selem = skimage.morphology.disk(10) 281 | goal = skimage.morphology.binary_dilation( 282 | goal, selem) != True 283 | goal = 1 - goal * 1. 284 | planner.set_multi_goal(goal) 285 | 286 | state = [start[0] - x1 + 1, start[1] - y1 + 1] 287 | stg_x, stg_y, _, stop = planner.get_short_term_goal(state) 288 | 289 | stg_x, stg_y = stg_x + x1 - 1, stg_y + y1 - 1 290 | 291 | return (stg_x, stg_y), stop 292 | 293 | def _preprocess_obs(self, obs, use_seg=True): 294 | args = self.args 295 | obs = obs.transpose(1, 2, 0) 296 | rgb = obs[:, :, :3] 297 | depth = obs[:, :, 3:4] 298 | 299 | sem_seg_pred = self._get_sem_pred( 300 | rgb.astype(np.uint8), use_seg=use_seg) 301 | depth = self._preprocess_depth(depth, args.min_depth, args.max_depth) 302 | 303 | ds = args.env_frame_width // args.frame_width # Downscaling factor 304 | if ds != 1: 305 | rgb = np.asarray(self.res(rgb.astype(np.uint8))) 306 | depth = depth[ds // 2::ds, ds // 2::ds] 307 | sem_seg_pred = sem_seg_pred[ds // 2::ds, ds // 2::ds] 308 | 309 | depth = np.expand_dims(depth, axis=2) 310 | state = np.concatenate((rgb, depth, sem_seg_pred), 311 | axis=2).transpose(2, 0, 1) 312 | 313 | return state 314 | 315 | def _preprocess_depth(self, depth, min_d, max_d): 316 | depth = depth[:, :, 0] * 1 317 | 318 | for i in range(depth.shape[1]): 319 | depth[:, i][depth[:, i] == 0.] = depth[:, i].max() 320 | 321 | mask2 = depth > 0.99 322 | depth[mask2] = 0. 323 | 324 | mask1 = depth == 0 325 | depth[mask1] = 100.0 326 | depth = min_d * 100.0 + depth * max_d * 100.0 327 | return depth 328 | 329 | def _get_sem_pred(self, rgb, use_seg=True): 330 | if use_seg: 331 | semantic_pred, self.rgb_vis = self.sem_pred.get_prediction(rgb) 332 | semantic_pred = semantic_pred.astype(np.float32) 333 | else: 334 | semantic_pred = np.zeros((rgb.shape[0], rgb.shape[1], 16)) 335 | self.rgb_vis = rgb[:, :, ::-1] 336 | return semantic_pred 337 | 338 | def _visualize(self, inputs): 339 | args = self.args 340 | dump_dir = "{}/dump/{}/".format(args.dump_location, 341 | args.exp_name) 342 | ep_dir = '{}/episodes/thread_{}/eps_{}/'.format( 343 | dump_dir, self.rank, self.episode_no) 344 | if not os.path.exists(ep_dir): 345 | os.makedirs(ep_dir) 346 | 347 | map_pred = inputs['map_pred'] 348 | exp_pred = inputs['exp_pred'] 349 | start_x, start_y, start_o, gx1, gx2, gy1, gy2 = inputs['pose_pred'] 350 | 351 | goal = inputs['goal'] 352 | sem_map = inputs['sem_map_pred'] 353 | 354 | gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2) 355 | 356 | sem_map += 5 357 | 358 | no_cat_mask = sem_map == 20 359 | map_mask = np.rint(map_pred) == 1 360 | exp_mask = np.rint(exp_pred) == 1 361 | vis_mask = self.visited_vis[gx1:gx2, gy1:gy2] == 1 362 | 363 | sem_map[no_cat_mask] = 0 364 | m1 = np.logical_and(no_cat_mask, exp_mask) 365 | sem_map[m1] = 2 366 | 367 | m2 = np.logical_and(no_cat_mask, map_mask) 368 | sem_map[m2] = 1 369 | 370 | sem_map[vis_mask] = 3 371 | 372 | selem = skimage.morphology.disk(4) 373 | goal_mat = 1 - skimage.morphology.binary_dilation( 374 | goal, selem) != True 375 | 376 | goal_mask = goal_mat == 1 377 | sem_map[goal_mask] = 4 378 | 379 | color_pal = [int(x * 255.) for x in color_palette] 380 | sem_map_vis = Image.new("P", (sem_map.shape[1], 381 | sem_map.shape[0])) 382 | sem_map_vis.putpalette(color_pal) 383 | sem_map_vis.putdata(sem_map.flatten().astype(np.uint8)) 384 | sem_map_vis = sem_map_vis.convert("RGB") 385 | sem_map_vis = np.flipud(sem_map_vis) 386 | 387 | sem_map_vis = sem_map_vis[:, :, [2, 1, 0]] 388 | sem_map_vis = cv2.resize(sem_map_vis, (480, 480), 389 | interpolation=cv2.INTER_NEAREST) 390 | self.vis_image[50:530, 15:655] = self.rgb_vis 391 | self.vis_image[50:530, 670:1150] = sem_map_vis 392 | 393 | pos = ( 394 | (start_x * 100. / args.map_resolution - gy1) 395 | * 480 / map_pred.shape[0], 396 | (map_pred.shape[1] - start_y * 100. / args.map_resolution + gx1) 397 | * 480 / map_pred.shape[1], 398 | np.deg2rad(-start_o) 399 | ) 400 | 401 | agent_arrow = vu.get_contour_points(pos, origin=(670, 50)) 402 | color = (int(color_palette[11] * 255), 403 | int(color_palette[10] * 255), 404 | int(color_palette[9] * 255)) 405 | cv2.drawContours(self.vis_image, [agent_arrow], 0, color, -1) 406 | 407 | if args.visualize: 408 | # Displaying the image 409 | cv2.imshow("Thread {}".format(self.rank), self.vis_image) 410 | cv2.waitKey(1) 411 | 412 | if args.print_images: 413 | fn = '{}/episodes/thread_{}/eps_{}/{}-{}-Vis-{}.png'.format( 414 | dump_dir, self.rank, self.episode_no, 415 | self.rank, self.episode_no, self.timestep) 416 | cv2.imwrite(fn, self.vis_image) 417 | -------------------------------------------------------------------------------- /agents/utils/semantic_prediction.py: -------------------------------------------------------------------------------- 1 | # The following code is largely borrowed from 2 | # https://github.com/facebookresearch/detectron2/blob/master/demo/demo.py and 3 | # https://github.com/facebookresearch/detectron2/blob/master/demo/predictor.py 4 | 5 | import argparse 6 | import time 7 | 8 | import torch 9 | import numpy as np 10 | 11 | from detectron2.config import get_cfg 12 | from detectron2.utils.logger import setup_logger 13 | from detectron2.data.catalog import MetadataCatalog 14 | from detectron2.modeling import build_model 15 | from detectron2.checkpoint import DetectionCheckpointer 16 | from detectron2.utils.visualizer import ColorMode, Visualizer 17 | import detectron2.data.transforms as T 18 | 19 | from constants import coco_categories_mapping 20 | 21 | 22 | class SemanticPredMaskRCNN(): 23 | 24 | def __init__(self, args): 25 | self.segmentation_model = ImageSegmentation(args) 26 | self.args = args 27 | 28 | def get_prediction(self, img): 29 | args = self.args 30 | image_list = [] 31 | img = img[:, :, ::-1] 32 | image_list.append(img) 33 | seg_predictions, vis_output = self.segmentation_model.get_predictions( 34 | image_list, visualize=args.visualize == 2) 35 | 36 | if args.visualize == 2: 37 | img = vis_output.get_image() 38 | 39 | semantic_input = np.zeros((img.shape[0], img.shape[1], 15 + 1)) 40 | 41 | for j, class_idx in enumerate( 42 | seg_predictions[0]['instances'].pred_classes.cpu().numpy()): 43 | if class_idx in list(coco_categories_mapping.keys()): 44 | idx = coco_categories_mapping[class_idx] 45 | obj_mask = seg_predictions[0]['instances'].pred_masks[j] * 1. 46 | semantic_input[:, :, idx] += obj_mask.cpu().numpy() 47 | 48 | return semantic_input, img 49 | 50 | 51 | def compress_sem_map(sem_map): 52 | c_map = np.zeros((sem_map.shape[1], sem_map.shape[2])) 53 | for i in range(sem_map.shape[0]): 54 | c_map[sem_map[i] > 0.] = i + 1 55 | return c_map 56 | 57 | 58 | class ImageSegmentation(): 59 | def __init__(self, args): 60 | string_args = """ 61 | --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml 62 | --input input1.jpeg 63 | --confidence-threshold {} 64 | --opts MODEL.WEIGHTS 65 | detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl 66 | """.format(args.sem_pred_prob_thr) 67 | 68 | if args.sem_gpu_id == -2: 69 | string_args += """ MODEL.DEVICE cpu""" 70 | else: 71 | string_args += """ MODEL.DEVICE cuda:{}""".format(args.sem_gpu_id) 72 | 73 | string_args = string_args.split() 74 | 75 | args = get_seg_parser().parse_args(string_args) 76 | logger = setup_logger() 77 | logger.info("Arguments: " + str(args)) 78 | 79 | cfg = setup_cfg(args) 80 | self.demo = VisualizationDemo(cfg) 81 | 82 | def get_predictions(self, img, visualize=0): 83 | return self.demo.run_on_image(img, visualize=visualize) 84 | 85 | 86 | def setup_cfg(args): 87 | # load config from file and command-line arguments 88 | cfg = get_cfg() 89 | cfg.merge_from_file(args.config_file) 90 | cfg.merge_from_list(args.opts) 91 | # Set score_threshold for builtin models 92 | cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold 93 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold 94 | cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = \ 95 | args.confidence_threshold 96 | cfg.freeze() 97 | return cfg 98 | 99 | 100 | def get_seg_parser(): 101 | parser = argparse.ArgumentParser( 102 | description="Detectron2 demo for builtin models") 103 | parser.add_argument( 104 | "--config-file", 105 | default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml", 106 | metavar="FILE", 107 | help="path to config file", 108 | ) 109 | parser.add_argument( 110 | "--webcam", 111 | action="store_true", 112 | help="Take inputs from webcam.") 113 | parser.add_argument("--video-input", help="Path to video file.") 114 | parser.add_argument( 115 | "--input", 116 | nargs="+", 117 | help="A list of space separated input images") 118 | parser.add_argument( 119 | "--output", 120 | help="A file or directory to save output visualizations. " 121 | "If not given, will show output in an OpenCV window.", 122 | ) 123 | 124 | parser.add_argument( 125 | "--confidence-threshold", 126 | type=float, 127 | default=0.5, 128 | help="Minimum score for instance predictions to be shown", 129 | ) 130 | parser.add_argument( 131 | "--opts", 132 | help="Modify config options using the command-line 'KEY VALUE' pairs", 133 | default=[], 134 | nargs=argparse.REMAINDER, 135 | ) 136 | return parser 137 | 138 | 139 | class VisualizationDemo(object): 140 | def __init__(self, cfg, instance_mode=ColorMode.IMAGE): 141 | """ 142 | Args: 143 | cfg (CfgNode): 144 | instance_mode (ColorMode): 145 | """ 146 | self.metadata = MetadataCatalog.get( 147 | cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" 148 | ) 149 | self.cpu_device = torch.device("cpu") 150 | self.instance_mode = instance_mode 151 | 152 | self.predictor = BatchPredictor(cfg) 153 | 154 | def run_on_image(self, image_list, visualize=0): 155 | """ 156 | Args: 157 | image (np.ndarray): an image of shape (H, W, C) (in BGR order). 158 | This is the format used by OpenCV. 159 | 160 | Returns: 161 | predictions (dict): the output of the model. 162 | vis_output (VisImage): the visualized image output. 163 | """ 164 | vis_output = None 165 | all_predictions = self.predictor(image_list) 166 | # Convert image from OpenCV BGR format to Matplotlib RGB format. 167 | 168 | if visualize: 169 | predictions = all_predictions[0] 170 | image = image_list[0] 171 | visualizer = Visualizer( 172 | image, self.metadata, instance_mode=self.instance_mode) 173 | if "panoptic_seg" in predictions: 174 | panoptic_seg, segments_info = predictions["panoptic_seg"] 175 | vis_output = visualizer.draw_panoptic_seg_predictions( 176 | panoptic_seg.to(self.cpu_device), segments_info 177 | ) 178 | else: 179 | if "sem_seg" in predictions: 180 | vis_output = visualizer.draw_sem_seg( 181 | predictions["sem_seg"].argmax( 182 | dim=0).to(self.cpu_device) 183 | ) 184 | if "instances" in predictions: 185 | instances = predictions["instances"].to(self.cpu_device) 186 | vis_output = visualizer.draw_instance_predictions( 187 | predictions=instances) 188 | 189 | return all_predictions, vis_output 190 | 191 | 192 | class BatchPredictor: 193 | """ 194 | Create a simple end-to-end predictor with the given config that runs on 195 | single device for a list of input images. 196 | 197 | Compared to using the model directly, this class does the following 198 | additions: 199 | 200 | 1. Load checkpoint from `cfg.MODEL.WEIGHTS`. 201 | 2. Always take BGR image as the input and apply conversion defined by 202 | `cfg.INPUT.FORMAT`. 203 | 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`. 204 | 4. Take a list of input images 205 | 206 | Attributes: 207 | metadata (Metadata): the metadata of the underlying dataset, obtained 208 | from cfg.DATASETS.TEST. 209 | 210 | """ 211 | 212 | def __init__(self, cfg): 213 | self.cfg = cfg.clone() # cfg can be modified by model 214 | self.model = build_model(self.cfg) 215 | self.model.eval() 216 | self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) 217 | 218 | checkpointer = DetectionCheckpointer(self.model) 219 | checkpointer.load(cfg.MODEL.WEIGHTS) 220 | 221 | self.input_format = cfg.INPUT.FORMAT 222 | assert self.input_format in ["RGB", "BGR"], self.input_format 223 | 224 | def __call__(self, image_list): 225 | """ 226 | Args: 227 | image_list (list of np.ndarray): a list of images of 228 | shape (H, W, C) (in BGR order). 229 | 230 | Returns: 231 | predictions (dict): 232 | the output of the model for all images. 233 | See :doc:`/tutorials/models` for details about the format. 234 | """ 235 | inputs = [] 236 | for original_image in image_list: 237 | # https://github.com/sphinx-doc/sphinx/issues/4258 238 | # Apply pre-processing to image. 239 | if self.input_format == "RGB": 240 | # whether the model expects BGR inputs or RGB 241 | original_image = original_image[:, :, ::-1] 242 | height, width = original_image.shape[:2] 243 | image = original_image 244 | image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) 245 | 246 | instance = {"image": image, "height": height, "width": width} 247 | 248 | inputs.append(instance) 249 | 250 | with torch.no_grad(): 251 | predictions = self.model(inputs) 252 | return predictions 253 | -------------------------------------------------------------------------------- /agents/utils/visualization.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def get_contour_points(pos, origin, size=20): 6 | x, y, o = pos 7 | pt1 = (int(x) + origin[0], 8 | int(y) + origin[1]) 9 | pt2 = (int(x + size / 1.5 * np.cos(o + np.pi * 4 / 3)) + origin[0], 10 | int(y + size / 1.5 * np.sin(o + np.pi * 4 / 3)) + origin[1]) 11 | pt3 = (int(x + size * np.cos(o)) + origin[0], 12 | int(y + size * np.sin(o)) + origin[1]) 13 | pt4 = (int(x + size / 1.5 * np.cos(o - np.pi * 4 / 3)) + origin[0], 14 | int(y + size / 1.5 * np.sin(o - np.pi * 4 / 3)) + origin[1]) 15 | 16 | return np.array([pt1, pt2, pt3, pt4]) 17 | 18 | 19 | def draw_line(start, end, mat, steps=25, w=1): 20 | for i in range(steps + 1): 21 | x = int(np.rint(start[0] + (end[0] - start[0]) * i / steps)) 22 | y = int(np.rint(start[1] + (end[1] - start[1]) * i / steps)) 23 | mat[x - w:x + w, y - w:y + w] = 1 24 | return mat 25 | 26 | 27 | def init_vis_image(goal_name, legend): 28 | vis_image = np.ones((655, 1165, 3)).astype(np.uint8) * 255 29 | font = cv2.FONT_HERSHEY_SIMPLEX 30 | fontScale = 1 31 | color = (20, 20, 20) # BGR 32 | thickness = 2 33 | 34 | text = "Observations (Goal: {})".format(goal_name) 35 | textsize = cv2.getTextSize(text, font, fontScale, thickness)[0] 36 | textX = (640 - textsize[0]) // 2 + 15 37 | textY = (50 + textsize[1]) // 2 38 | vis_image = cv2.putText(vis_image, text, (textX, textY), 39 | font, fontScale, color, thickness, 40 | cv2.LINE_AA) 41 | 42 | text = "Predicted Semantic Map" 43 | textsize = cv2.getTextSize(text, font, fontScale, thickness)[0] 44 | textX = 640 + (480 - textsize[0]) // 2 + 30 45 | textY = (50 + textsize[1]) // 2 46 | vis_image = cv2.putText(vis_image, text, (textX, textY), 47 | font, fontScale, color, thickness, 48 | cv2.LINE_AA) 49 | 50 | # draw outlines 51 | color = [100, 100, 100] 52 | vis_image[49, 15:655] = color 53 | vis_image[49, 670:1150] = color 54 | vis_image[50:530, 14] = color 55 | vis_image[50:530, 655] = color 56 | vis_image[50:530, 669] = color 57 | vis_image[50:530, 1150] = color 58 | vis_image[530, 15:655] = color 59 | vis_image[530, 670:1150] = color 60 | 61 | # draw legend 62 | lx, ly, _ = legend.shape 63 | vis_image[537:537 + lx, 155:155 + ly, :] = legend 64 | 65 | return vis_image 66 | -------------------------------------------------------------------------------- /algo/__init__.py: -------------------------------------------------------------------------------- 1 | from .ppo import PPO 2 | -------------------------------------------------------------------------------- /algo/ppo.py: -------------------------------------------------------------------------------- 1 | # The following code is largely borrowed from: 2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/algo/ppo.py 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.optim as optim 7 | 8 | 9 | class PPO(): 10 | 11 | def __init__( 12 | self, 13 | actor_critic, 14 | clip_param, 15 | ppo_epoch, 16 | num_mini_batch, 17 | value_loss_coef, 18 | entropy_coef, 19 | lr=None, 20 | eps=None, 21 | max_grad_norm=None, 22 | use_clipped_value_loss=True): 23 | 24 | self.actor_critic = actor_critic 25 | 26 | self.clip_param = clip_param 27 | self.ppo_epoch = ppo_epoch 28 | self.num_mini_batch = num_mini_batch 29 | 30 | self.value_loss_coef = value_loss_coef 31 | self.entropy_coef = entropy_coef 32 | 33 | self.max_grad_norm = max_grad_norm 34 | self.use_clipped_value_loss = use_clipped_value_loss 35 | 36 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, 37 | actor_critic.parameters()), 38 | lr=lr, eps=eps) 39 | 40 | def update(self, rollouts): 41 | advantages = rollouts.returns[:-1] - rollouts.value_preds[:-1] 42 | advantages = (advantages - advantages.mean()) / ( 43 | advantages.std() + 1e-5) 44 | 45 | value_loss_epoch = 0 46 | action_loss_epoch = 0 47 | dist_entropy_epoch = 0 48 | 49 | for _ in range(self.ppo_epoch): 50 | 51 | if self.actor_critic.is_recurrent: 52 | data_generator = rollouts.recurrent_generator( 53 | advantages, self.num_mini_batch) 54 | else: 55 | data_generator = rollouts.feed_forward_generator( 56 | advantages, self.num_mini_batch) 57 | 58 | for sample in data_generator: 59 | 60 | value_preds = sample['value_preds'] 61 | returns = sample['returns'] 62 | adv_targ = sample['adv_targ'] 63 | 64 | # Reshape to do in a single forward pass for all steps 65 | values, action_log_probs, dist_entropy, _ = \ 66 | self.actor_critic.evaluate_actions( 67 | sample['obs'], sample['rec_states'], 68 | sample['masks'], sample['actions'], 69 | extras=sample['extras'] 70 | ) 71 | 72 | ratio = torch.exp(action_log_probs - 73 | sample['old_action_log_probs']) 74 | surr1 = ratio * adv_targ 75 | surr2 = torch.clamp(ratio, 1.0 - self.clip_param, 76 | 1.0 + self.clip_param) * adv_targ 77 | action_loss = -torch.min(surr1, surr2).mean() 78 | 79 | if self.use_clipped_value_loss: 80 | value_pred_clipped = value_preds + \ 81 | (values - value_preds).clamp( 82 | -self.clip_param, self.clip_param) 83 | value_losses = (values - returns).pow(2) 84 | value_losses_clipped = (value_pred_clipped 85 | - returns).pow(2) 86 | value_loss = .5 * torch.max(value_losses, 87 | value_losses_clipped).mean() 88 | else: 89 | value_loss = 0.5 * (returns - values).pow(2).mean() 90 | 91 | self.optimizer.zero_grad() 92 | (value_loss * self.value_loss_coef + action_loss - 93 | dist_entropy * self.entropy_coef).backward() 94 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), 95 | self.max_grad_norm) 96 | self.optimizer.step() 97 | 98 | value_loss_epoch += value_loss.item() 99 | action_loss_epoch += action_loss.item() 100 | dist_entropy_epoch += dist_entropy.item() 101 | 102 | num_updates = self.ppo_epoch * self.num_mini_batch 103 | 104 | value_loss_epoch /= num_updates 105 | action_loss_epoch /= num_updates 106 | dist_entropy_epoch /= num_updates 107 | 108 | return value_loss_epoch, action_loss_epoch, dist_entropy_epoch 109 | -------------------------------------------------------------------------------- /arguments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | 5 | def get_args(): 6 | parser = argparse.ArgumentParser( 7 | description='Goal-Oriented-Semantic-Exploration') 8 | 9 | # General Arguments 10 | parser.add_argument('--seed', type=int, default=1, 11 | help='random seed (default: 1)') 12 | parser.add_argument('--auto_gpu_config', type=int, default=1) 13 | parser.add_argument('--total_num_scenes', type=str, default="auto") 14 | parser.add_argument('-n', '--num_processes', type=int, default=5, 15 | help="""how many training processes to use (default:5) 16 | Overridden when auto_gpu_config=1 17 | and training on gpus""") 18 | parser.add_argument('--num_processes_per_gpu', type=int, default=6) 19 | parser.add_argument('--num_processes_on_first_gpu', type=int, default=1) 20 | parser.add_argument('--eval', type=int, default=0, 21 | help='0: Train, 1: Evaluate (default: 0)') 22 | parser.add_argument('--num_training_frames', type=int, default=10000000, 23 | help='total number of training frames') 24 | parser.add_argument('--num_eval_episodes', type=int, default=200, 25 | help="number of test episodes per scene") 26 | parser.add_argument('--num_train_episodes', type=int, default=10000, 27 | help="""number of train episodes per scene 28 | before loading the next scene""") 29 | parser.add_argument('--no_cuda', action='store_true', default=False, 30 | help='disables CUDA training') 31 | parser.add_argument("--sim_gpu_id", type=int, default=0, 32 | help="gpu id on which scenes are loaded") 33 | parser.add_argument("--sem_gpu_id", type=int, default=-1, 34 | help="""gpu id for semantic model, 35 | -1: same as sim gpu, -2: cpu""") 36 | 37 | # Logging, loading models, visualization 38 | parser.add_argument('--log_interval', type=int, default=10, 39 | help="""log interval, one log per n updates 40 | (default: 10) """) 41 | parser.add_argument('--save_interval', type=int, default=1, 42 | help="""save interval""") 43 | parser.add_argument('-d', '--dump_location', type=str, default="./tmp/", 44 | help='path to dump models and log (default: ./tmp/)') 45 | parser.add_argument('--exp_name', type=str, default="exp1", 46 | help='experiment name (default: exp1)') 47 | parser.add_argument('--save_periodic', type=int, default=500000, 48 | help='Model save frequency in number of updates') 49 | parser.add_argument('--load', type=str, default="0", 50 | help="""model path to load, 51 | 0 to not reload (default: 0)""") 52 | parser.add_argument('-v', '--visualize', type=int, default=0, 53 | help="""1: Render the observation and 54 | the predicted semantic map, 55 | 2: Render the observation with semantic 56 | predictions and the predicted semantic map 57 | (default: 0)""") 58 | parser.add_argument('--print_images', type=int, default=0, 59 | help='1: save visualization as images') 60 | 61 | # Environment, dataset and episode specifications 62 | parser.add_argument('-efw', '--env_frame_width', type=int, default=640, 63 | help='Frame width (default:640)') 64 | parser.add_argument('-efh', '--env_frame_height', type=int, default=480, 65 | help='Frame height (default:480)') 66 | parser.add_argument('-fw', '--frame_width', type=int, default=160, 67 | help='Frame width (default:160)') 68 | parser.add_argument('-fh', '--frame_height', type=int, default=120, 69 | help='Frame height (default:120)') 70 | parser.add_argument('-el', '--max_episode_length', type=int, default=500, 71 | help="""Maximum episode length""") 72 | parser.add_argument("--task_config", type=str, 73 | default="tasks/objectnav_gibson.yaml", 74 | help="path to config yaml containing task information") 75 | parser.add_argument("--split", type=str, default="train", 76 | help="dataset split (train | val | val_mini) ") 77 | parser.add_argument('--camera_height', type=float, default=0.88, 78 | help="agent camera height in metres") 79 | parser.add_argument('--hfov', type=float, default=79.0, 80 | help="horizontal field of view in degrees") 81 | parser.add_argument('--turn_angle', type=float, default=30, 82 | help="Agent turn angle in degrees") 83 | parser.add_argument('--min_depth', type=float, default=0.5, 84 | help="Minimum depth for depth sensor in meters") 85 | parser.add_argument('--max_depth', type=float, default=5.0, 86 | help="Maximum depth for depth sensor in meters") 87 | parser.add_argument('--success_dist', type=float, default=1.0, 88 | help="success distance threshold in meters") 89 | parser.add_argument('--floor_thr', type=int, default=50, 90 | help="floor threshold in cm") 91 | parser.add_argument('--min_d', type=float, default=1.5, 92 | help="min distance to goal during training in meters") 93 | parser.add_argument('--max_d', type=float, default=100.0, 94 | help="max distance to goal during training in meters") 95 | parser.add_argument('--version', type=str, default="v1.1", 96 | help="dataset version") 97 | 98 | # Model Hyperparameters 99 | parser.add_argument('--agent', type=str, default="sem_exp") 100 | parser.add_argument('--lr', type=float, default=2.5e-5, 101 | help='learning rate (default: 2.5e-5)') 102 | parser.add_argument('--global_hidden_size', type=int, default=256, 103 | help='global_hidden_size') 104 | parser.add_argument('--eps', type=float, default=1e-5, 105 | help='RL Optimizer epsilon (default: 1e-5)') 106 | parser.add_argument('--alpha', type=float, default=0.99, 107 | help='RL Optimizer alpha (default: 0.99)') 108 | parser.add_argument('--gamma', type=float, default=0.99, 109 | help='discount factor for rewards (default: 0.99)') 110 | parser.add_argument('--use_gae', action='store_true', default=False, 111 | help='use generalized advantage estimation') 112 | parser.add_argument('--tau', type=float, default=0.95, 113 | help='gae parameter (default: 0.95)') 114 | parser.add_argument('--entropy_coef', type=float, default=0.001, 115 | help='entropy term coefficient (default: 0.01)') 116 | parser.add_argument('--value_loss_coef', type=float, default=0.5, 117 | help='value loss coefficient (default: 0.5)') 118 | parser.add_argument('--max_grad_norm', type=float, default=0.5, 119 | help='max norm of gradients (default: 0.5)') 120 | parser.add_argument('--num_global_steps', type=int, default=20, 121 | help='number of forward steps in A2C (default: 5)') 122 | parser.add_argument('--ppo_epoch', type=int, default=4, 123 | help='number of ppo epochs (default: 4)') 124 | parser.add_argument('--num_mini_batch', type=str, default="auto", 125 | help='number of batches for ppo (default: 32)') 126 | parser.add_argument('--clip_param', type=float, default=0.2, 127 | help='ppo clip parameter (default: 0.2)') 128 | parser.add_argument('--use_recurrent_global', type=int, default=0, 129 | help='use a recurrent global policy') 130 | parser.add_argument('--num_local_steps', type=int, default=25, 131 | help="""Number of steps the local policy 132 | between each global step""") 133 | parser.add_argument('--reward_coeff', type=float, default=0.1, 134 | help="Object goal reward coefficient") 135 | parser.add_argument('--intrinsic_rew_coeff', type=float, default=0.02, 136 | help="intrinsic exploration reward coefficient") 137 | parser.add_argument('--num_sem_categories', type=float, default=16) 138 | parser.add_argument('--sem_pred_prob_thr', type=float, default=0.9, 139 | help="Semantic prediction confidence threshold") 140 | 141 | # Mapping 142 | parser.add_argument('--global_downscaling', type=int, default=2) 143 | parser.add_argument('--vision_range', type=int, default=100) 144 | parser.add_argument('--map_resolution', type=int, default=5) 145 | parser.add_argument('--du_scale', type=int, default=1) 146 | parser.add_argument('--map_size_cm', type=int, default=2400) 147 | parser.add_argument('--cat_pred_threshold', type=float, default=5.0) 148 | parser.add_argument('--map_pred_threshold', type=float, default=1.0) 149 | parser.add_argument('--exp_pred_threshold', type=float, default=1.0) 150 | parser.add_argument('--collision_threshold', type=float, default=0.20) 151 | 152 | # parse arguments 153 | args = parser.parse_args() 154 | 155 | args.cuda = not args.no_cuda and torch.cuda.is_available() 156 | 157 | if args.cuda: 158 | if args.auto_gpu_config: 159 | num_gpus = torch.cuda.device_count() 160 | if args.total_num_scenes != "auto": 161 | args.total_num_scenes = int(args.total_num_scenes) 162 | elif "objectnav_gibson" in args.task_config and \ 163 | "train" in args.split: 164 | args.total_num_scenes = 25 165 | elif "objectnav_gibson" in args.task_config and \ 166 | "val" in args.split: 167 | args.total_num_scenes = 5 168 | else: 169 | assert False, "Unknown task config, please specify" + \ 170 | " total_num_scenes" 171 | 172 | # GPU Memory required for the SemExp model: 173 | # 0.8 + 0.4 * args.total_num_scenes (GB) 174 | # GPU Memory required per thread: 2.6 (GB) 175 | min_memory_required = max(0.8 + 0.4 * args.total_num_scenes, 2.6) 176 | # Automatically configure number of training threads based on 177 | # number of GPUs available and GPU memory size 178 | gpu_memory = 1000 179 | for i in range(num_gpus): 180 | gpu_memory = min(gpu_memory, 181 | torch.cuda.get_device_properties( 182 | i).total_memory 183 | / 1024 / 1024 / 1024) 184 | assert gpu_memory > min_memory_required, \ 185 | """Insufficient GPU memory for GPU {}, gpu memory ({}GB) 186 | needs to be greater than {}GB""".format( 187 | i, gpu_memory, min_memory_required) 188 | 189 | num_processes_per_gpu = int(gpu_memory / 2.6) 190 | num_processes_on_first_gpu = \ 191 | int((gpu_memory - min_memory_required) / 2.6) 192 | 193 | if args.eval: 194 | max_threads = num_processes_per_gpu * (num_gpus - 1) \ 195 | + num_processes_on_first_gpu 196 | assert max_threads >= args.total_num_scenes, \ 197 | """Insufficient GPU memory for evaluation""" 198 | 199 | if num_gpus == 1: 200 | args.num_processes_on_first_gpu = num_processes_on_first_gpu 201 | args.num_processes_per_gpu = 0 202 | args.num_processes = num_processes_on_first_gpu 203 | assert args.num_processes > 0, "Insufficient GPU memory" 204 | else: 205 | num_threads = num_processes_per_gpu * (num_gpus - 1) \ 206 | + num_processes_on_first_gpu 207 | num_threads = min(num_threads, args.total_num_scenes) 208 | args.num_processes_per_gpu = num_processes_per_gpu 209 | args.num_processes_on_first_gpu = max( 210 | 0, 211 | num_threads - args.num_processes_per_gpu * (num_gpus - 1)) 212 | args.num_processes = num_threads 213 | 214 | args.sim_gpu_id = 1 215 | 216 | print("Auto GPU config:") 217 | print("Number of processes: {}".format(args.num_processes)) 218 | print("Number of processes on GPU 0: {}".format( 219 | args.num_processes_on_first_gpu)) 220 | print("Number of processes per GPU: {}".format( 221 | args.num_processes_per_gpu)) 222 | else: 223 | args.sem_gpu_id = -2 224 | 225 | if args.num_mini_batch == "auto": 226 | args.num_mini_batch = max(args.num_processes // 2, 1) 227 | else: 228 | args.num_mini_batch = int(args.num_mini_batch) 229 | 230 | return args 231 | -------------------------------------------------------------------------------- /configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /constants.py: -------------------------------------------------------------------------------- 1 | scenes = {} 2 | scenes["train"] = [ 3 | 'Allensville', 4 | 'Beechwood', 5 | 'Benevolence', 6 | 'Coffeen', 7 | 'Cosmos', 8 | 'Forkland', 9 | 'Hanson', 10 | 'Hiteman', 11 | 'Klickitat', 12 | 'Lakeville', 13 | 'Leonardo', 14 | 'Lindenwood', 15 | 'Marstons', 16 | 'Merom', 17 | 'Mifflinburg', 18 | 'Newfields', 19 | 'Onaga', 20 | 'Pinesdale', 21 | 'Pomaria', 22 | 'Ranchester', 23 | 'Shelbyville', 24 | 'Stockman', 25 | 'Tolstoy', 26 | 'Wainscott', 27 | 'Woodbine', 28 | ] 29 | 30 | scenes["val"] = [ 31 | 'Collierville', 32 | 'Corozal', 33 | 'Darden', 34 | 'Markleeville', 35 | 'Wiconisco', 36 | ] 37 | 38 | coco_categories = { 39 | "chair": 0, 40 | "couch": 1, 41 | "potted plant": 2, 42 | "bed": 3, 43 | "toilet": 4, 44 | "tv": 5, 45 | "dining-table": 6, 46 | "oven": 7, 47 | "sink": 8, 48 | "refrigerator": 9, 49 | "book": 10, 50 | "clock": 11, 51 | "vase": 12, 52 | "cup": 13, 53 | "bottle": 14 54 | } 55 | 56 | coco_categories_mapping = { 57 | 56: 0, # chair 58 | 57: 1, # couch 59 | 58: 2, # potted plant 60 | 59: 3, # bed 61 | 61: 4, # toilet 62 | 62: 5, # tv 63 | 60: 6, # dining-table 64 | 69: 7, # oven 65 | 71: 8, # sink 66 | 72: 9, # refrigerator 67 | 73: 10, # book 68 | 74: 11, # clock 69 | 75: 12, # vase 70 | 41: 13, # cup 71 | 39: 14, # bottle 72 | } 73 | 74 | color_palette = [ 75 | 1.0, 1.0, 1.0, 76 | 0.6, 0.6, 0.6, 77 | 0.95, 0.95, 0.95, 78 | 0.96, 0.36, 0.26, 79 | 0.12156862745098039, 0.47058823529411764, 0.7058823529411765, 80 | 0.9400000000000001, 0.7818, 0.66, 81 | 0.9400000000000001, 0.8868, 0.66, 82 | 0.8882000000000001, 0.9400000000000001, 0.66, 83 | 0.7832000000000001, 0.9400000000000001, 0.66, 84 | 0.6782000000000001, 0.9400000000000001, 0.66, 85 | 0.66, 0.9400000000000001, 0.7468000000000001, 86 | 0.66, 0.9400000000000001, 0.8518000000000001, 87 | 0.66, 0.9232, 0.9400000000000001, 88 | 0.66, 0.8182, 0.9400000000000001, 89 | 0.66, 0.7132, 0.9400000000000001, 90 | 0.7117999999999999, 0.66, 0.9400000000000001, 91 | 0.8168, 0.66, 0.9400000000000001, 92 | 0.9218, 0.66, 0.9400000000000001, 93 | 0.9400000000000001, 0.66, 0.8531999999999998, 94 | 0.9400000000000001, 0.66, 0.748199999999999] 95 | -------------------------------------------------------------------------------- /docs/DOCKER_INSTRUCTIONS.md: -------------------------------------------------------------------------------- 1 | # Docker and Singularity Instructions: 2 | We provide experimental [docker](https://www.docker.com/) and [singularity](https://sylabs.io/) images with all the dependencies installed. 3 | 4 | Before setting up the docker, pull the code using: 5 | ``` 6 | git clone https://github.com/devendrachaplot/Object-Goal-Navigation/ 7 | cd Object-Goal-Navigation/; 8 | ``` 9 | Download and set up the scene and episode datasets as described [here](README.md#setup). 10 | 11 | For docker, either build docker image using the provided [Dockerfile](./Dockerfile): 12 | ``` 13 | docker build -t devendrachaplot/habitat:sem_exp . 14 | ``` 15 | Or pull docker image from dockerhub: 16 | ``` 17 | docker pull devendrachaplot/habitat:sem_exp 18 | ``` 19 | 20 | After building or pulling the docker image, run the docker using: 21 | ``` 22 | docker run -v $(pwd)/:/code -v $(pwd)/data:/code/data --runtime=nvidia -it devendrachaplot/habitat:sem_exp 23 | ``` 24 | 25 | Inside the docker, check the habitat compatibility with your system: 26 | ``` 27 | cd /habitat-api/ 28 | python examples/benchmark.py 29 | ``` 30 | 31 | To run the SemExp model inside the docker, `cd /code/` and run the same commands as described in [INSTRUCTIONS](./INSTRUCTIONS.md). 32 | 33 | For pulling the singularity image: 34 | ``` 35 | singularity pull docker://devendrachaplot/habitat:sem_exp 36 | ``` -------------------------------------------------------------------------------- /docs/INSTRUCTIONS.md: -------------------------------------------------------------------------------- 1 | # Instructions 2 | 3 | ## Training 4 | For training the SemExp model on the Object Goal Navigation task: 5 | ``` 6 | python main.py 7 | ``` 8 | 9 | ### Specifying number of threads 10 | The code runs multiple parallel threads for training. Each thread loads a scene on a GPU. The code automatically decides the total number of threads and number of threads on each GPU based on the available GPUs. 11 | 12 | If you would like to not use the auto gpu config, you need to specify the following: 13 | ``` 14 | --auto_gpu_config 0 15 | -n, --num_processes NUM_PROCESSES 16 | --num_processes_per_gpu NUM_PROCESSES_PER_GPU 17 | --num_processes_on_first_gpu NUM_PROCESSES_ON_FIRST_GPU 18 | ``` 19 | `NUM_PROCESSES_PER_GPU` will depend on your GPU memory, 6 works well for 16GB GPUs. 20 | `NUM_PROCESSES_ON_FIRST_GPU` specifies the number of processes on the first GPU in addition to the SemExp model, 1 works well for 16GB GPUs. 21 | `NUM_PROCESSES` depends on the number of GPUs used for training and `NUM_PROCESSES_PER_GPU` such that 22 | ``` 23 | NUM_PROCESSES <= min(NUM_PROCESSES_PER_GPU * number of GPUs + NUM_PROCESSES_ON_FIRST_GPU, 25) 24 | ``` 25 | The Gibson training set consists of 25 scenes. 26 | 27 | For example, for training the model on 5 GPUs with 16GB memory per GPU: 28 | ``` 29 | python main.py --auto_gpu_config 0 -n 25 --num_processes_per_gpu 6 --num_processes_on_first_gpu 1 --sim_gpu_id 1 30 | ``` 31 | Here, `sim_gpu_id = 1` specifies simulator threads to run from GPUs 1 onwards. 32 | Each GPU from 1 to 4 will run 6 threads each, and GPU 0 will run 1 thread and 33 | the SemExp model. 34 | 35 | ### Specifying log location, periodic model dumps 36 | ``` 37 | python main.py -d saved/ --exp_name exp1 --save_periodic 500000 38 | ``` 39 | The above will save the best model files and training log at `saved/models/exp1/` and save all models periodically every 500000 steps at `saved/dump/exp1/`. Each module will be saved in a separate file. 40 | 41 | ### Hyper-parameters 42 | Most of the default hyper-parameters should work fine. Some hyperparameters are set for training with 25 threads, which might need to be tuned when using fewer threads. Fewer threads lead to a smaller batch size so the learning rate might need to be tuned using `--lr`. 43 | 44 | ## Downloading pre-trained models 45 | ``` 46 | mkdir pretrained_models; 47 | wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=171ZA7XNu5vi3XLpuKs8DuGGZrYyuSjL0' -O pretrained_models/sem_exp.pth 48 | ``` 49 | 50 | ## Evaluation 51 | 52 | The following are instructions to evaluate the model on the Gibson val set. 53 | 54 | For evaluating the pre-trained model: 55 | ``` 56 | python main.py --split val --eval 1 --load pretrained_models/sem_exp.pth 57 | ``` 58 | 59 | The pre-trained model should get 0.657 Success, 0.339 SPL and 1.474 DTG. 60 | 61 | ### Manual GPU config 62 | 63 | If you would like to not use the auto GPU config, specify the number of threads for evaluation using `--num_processes` and the number of evaluation episodes per thread using `--num_eval_episodes`. 64 | The Gibson val set consists of 5 scenes and 200 episodes per scene. Thus, we need to use 5 threads for evaluation, and 200 episodes per thread. Split 5 scenes on GPUs based on your GPU memory sizes. The code requires `0.8 + 0.4 * num_scenes (GB)` GPU memory on the first GPU for the model and around 2.6GB memory per scene. 65 | 66 | For example, if you have 1 GPU with 16GB memory: 67 | ``` 68 | python main.py --split val --eval 1 --auto_gpu_config 0 \ 69 | -n 5 --num_eval_episodes 200 --num_processes_on_first_gpu 5 \ 70 | --load pretrained_models/sem_exp.pth 71 | ``` 72 | or if you have 2 GPUs with 12GB memory each: 73 | ``` 74 | python main.py --split val --eval 1 --auto_gpu_config 0 \ 75 | -n 5 --num_eval_episodes 200 --num_processes_on_first_gpu 1 \ 76 | --num_processes_per_gpu 4 --sim_gpu_id 1 \ 77 | --load pretrained_models/sem_exp.pth 78 | ``` 79 | 80 | ### Visualization and printing images 81 | For visualizing the agent observations and predicted map and pose, add `-v 1` as an argument to the above command. This will require a display to be attached to the system. 82 | 83 | To visualize on headless systems (without display), use `--print_images 1 -d results/ --exp_name exp1`. This will save the visualization images in `results/dump/exp1/episodes/`. 84 | 85 | Both `-v 1` and `--print_images 1` can be used together to visualize and print images at the same time. 86 | 87 | 88 | ## Notes 89 | 90 | - Training the model for 10 million frames with 25 threads takes around 2.5 days on an Nvidia DGX-1 system using 5 16GB GPUs, but the model provides good performance even with only 1 million frames (~6 hrs) of training. 91 | 92 | - Evaluating the model on the val set for 1000 episodes with 5 threads takes around 2.5 hrs on an Nvidia DGX-1 system. 93 | 94 | - The code does not contain the Denoising Network described in our [paper](https://arxiv.org/pdf/2007.00643.pdf). 95 | This is because of the following reasons: 96 | - Training the Denoising Network requires downloading the original Gibson dataset (non-Habitat format), 3DSceneGraph dataset, and building Habitat format semantic scenes using both the datasets. 97 | - Training the Denoising Network requires building and cleaning top-down maps which makes training much slower. 98 | - The first-person semantic annotations for Gibson are not perfectly accurate, they do not align with the depth sensor. This results in Denoising Network only providing a marginal performance improvement. 99 | 100 | 101 | ## Tips 102 | To silence the habitat sim log add the following to your `~/.bashrc` (Linux) or `~/.bash_profile` (Mac) 103 | ``` 104 | export GLOG_minloglevel=2 105 | export MAGNUM_LOG="quiet" 106 | ``` -------------------------------------------------------------------------------- /docs/example.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devendrachaplot/Object-Goal-Navigation/5d76902fe9be821926a1de32557ca9a8dc21d0f5/docs/example.gif -------------------------------------------------------------------------------- /docs/legend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devendrachaplot/Object-Goal-Navigation/5d76902fe9be821926a1de32557ca9a8dc21d0f5/docs/legend.png -------------------------------------------------------------------------------- /docs/overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/devendrachaplot/Object-Goal-Navigation/5d76902fe9be821926a1de32557ca9a8dc21d0f5/docs/overview.jpg -------------------------------------------------------------------------------- /envs/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .habitat import construct_envs 4 | 5 | 6 | def make_vec_envs(args): 7 | envs = construct_envs(args) 8 | envs = VecPyTorch(envs, args.device) 9 | return envs 10 | 11 | 12 | # Adapted from 13 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/envs.py#L159 14 | class VecPyTorch(): 15 | 16 | def __init__(self, venv, device): 17 | self.venv = venv 18 | self.num_envs = venv.num_envs 19 | self.observation_space = venv.observation_space 20 | self.action_space = venv.action_space 21 | self.device = device 22 | 23 | def reset(self): 24 | obs, info = self.venv.reset() 25 | obs = torch.from_numpy(obs).float().to(self.device) 26 | return obs, info 27 | 28 | def step_async(self, actions): 29 | actions = actions.cpu().numpy() 30 | self.venv.step_async(actions) 31 | 32 | def step_wait(self): 33 | obs, reward, done, info = self.venv.step_wait() 34 | obs = torch.from_numpy(obs).float().to(self.device) 35 | reward = torch.from_numpy(reward).float() 36 | return obs, reward, done, info 37 | 38 | def step(self, actions): 39 | actions = actions.cpu().numpy() 40 | obs, reward, done, info = self.venv.step(actions) 41 | obs = torch.from_numpy(obs).float().to(self.device) 42 | reward = torch.from_numpy(reward).float() 43 | return obs, reward, done, info 44 | 45 | def get_rewards(self, inputs): 46 | reward = self.venv.get_rewards(inputs) 47 | reward = torch.from_numpy(reward).float() 48 | return reward 49 | 50 | def plan_act_and_preprocess(self, inputs): 51 | obs, reward, done, info = self.venv.plan_act_and_preprocess(inputs) 52 | obs = torch.from_numpy(obs).float().to(self.device) 53 | reward = torch.from_numpy(reward).float() 54 | return obs, reward, done, info 55 | 56 | def close(self): 57 | return self.venv.close() 58 | -------------------------------------------------------------------------------- /envs/habitat/__init__.py: -------------------------------------------------------------------------------- 1 | # Parts of the code in this file have been borrowed from: 2 | # https://github.com/facebookresearch/habitat-api 3 | import os 4 | import numpy as np 5 | import torch 6 | from habitat.config.default import get_config as cfg_env 7 | from habitat.datasets.pointnav.pointnav_dataset import PointNavDatasetV1 8 | from habitat import Config, Env, RLEnv, VectorEnv, make_dataset 9 | 10 | from agents.sem_exp import Sem_Exp_Env_Agent 11 | from .objectgoal_env import ObjectGoal_Env 12 | 13 | from .utils.vector_env import VectorEnv 14 | 15 | 16 | def make_env_fn(args, config_env, rank): 17 | dataset = make_dataset(config_env.DATASET.TYPE, config=config_env.DATASET) 18 | config_env.defrost() 19 | config_env.SIMULATOR.SCENE = dataset.episodes[0].scene_id 20 | config_env.freeze() 21 | 22 | if args.agent == "sem_exp": 23 | env = Sem_Exp_Env_Agent(args=args, rank=rank, 24 | config_env=config_env, 25 | dataset=dataset 26 | ) 27 | else: 28 | env = ObjectGoal_Env(args=args, rank=rank, 29 | config_env=config_env, 30 | dataset=dataset 31 | ) 32 | 33 | env.seed(rank) 34 | return env 35 | 36 | 37 | def _get_scenes_from_folder(content_dir): 38 | scene_dataset_ext = ".glb.json.gz" 39 | scenes = [] 40 | for filename in os.listdir(content_dir): 41 | if filename.endswith(scene_dataset_ext): 42 | scene = filename[: -len(scene_dataset_ext) + 4] 43 | scenes.append(scene) 44 | scenes.sort() 45 | return scenes 46 | 47 | 48 | def construct_envs(args): 49 | env_configs = [] 50 | args_list = [] 51 | 52 | basic_config = cfg_env(config_paths=["envs/habitat/configs/" 53 | + args.task_config]) 54 | basic_config.defrost() 55 | basic_config.DATASET.SPLIT = args.split 56 | basic_config.DATASET.DATA_PATH = \ 57 | basic_config.DATASET.DATA_PATH.replace("v1", args.version) 58 | basic_config.DATASET.EPISODES_DIR = \ 59 | basic_config.DATASET.EPISODES_DIR.replace("v1", args.version) 60 | basic_config.freeze() 61 | 62 | scenes = basic_config.DATASET.CONTENT_SCENES 63 | if "*" in basic_config.DATASET.CONTENT_SCENES: 64 | content_dir = os.path.join(basic_config.DATASET.EPISODES_DIR.format( 65 | split=args.split), "content") 66 | scenes = _get_scenes_from_folder(content_dir) 67 | 68 | if len(scenes) > 0: 69 | assert len(scenes) >= args.num_processes, ( 70 | "reduce the number of processes as there " 71 | "aren't enough number of scenes" 72 | ) 73 | 74 | scene_split_sizes = [int(np.floor(len(scenes) / args.num_processes)) 75 | for _ in range(args.num_processes)] 76 | for i in range(len(scenes) % args.num_processes): 77 | scene_split_sizes[i] += 1 78 | 79 | print("Scenes per thread:") 80 | for i in range(args.num_processes): 81 | config_env = cfg_env(config_paths=["envs/habitat/configs/" 82 | + args.task_config]) 83 | config_env.defrost() 84 | 85 | if len(scenes) > 0: 86 | config_env.DATASET.CONTENT_SCENES = scenes[ 87 | sum(scene_split_sizes[:i]): 88 | sum(scene_split_sizes[:i + 1]) 89 | ] 90 | print("Thread {}: {}".format(i, config_env.DATASET.CONTENT_SCENES)) 91 | 92 | if i < args.num_processes_on_first_gpu: 93 | gpu_id = 0 94 | else: 95 | gpu_id = int((i - args.num_processes_on_first_gpu) 96 | // args.num_processes_per_gpu) + args.sim_gpu_id 97 | gpu_id = min(torch.cuda.device_count() - 1, gpu_id) 98 | config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_id 99 | 100 | agent_sensors = [] 101 | agent_sensors.append("RGB_SENSOR") 102 | agent_sensors.append("DEPTH_SENSOR") 103 | # agent_sensors.append("SEMANTIC_SENSOR") 104 | 105 | config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors 106 | 107 | # Reseting episodes manually, setting high max episode length in sim 108 | config_env.ENVIRONMENT.MAX_EPISODE_STEPS = 10000000 109 | config_env.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False 110 | 111 | config_env.SIMULATOR.RGB_SENSOR.WIDTH = args.env_frame_width 112 | config_env.SIMULATOR.RGB_SENSOR.HEIGHT = args.env_frame_height 113 | config_env.SIMULATOR.RGB_SENSOR.HFOV = args.hfov 114 | config_env.SIMULATOR.RGB_SENSOR.POSITION = [0, args.camera_height, 0] 115 | 116 | config_env.SIMULATOR.DEPTH_SENSOR.WIDTH = args.env_frame_width 117 | config_env.SIMULATOR.DEPTH_SENSOR.HEIGHT = args.env_frame_height 118 | config_env.SIMULATOR.DEPTH_SENSOR.HFOV = args.hfov 119 | config_env.SIMULATOR.DEPTH_SENSOR.MIN_DEPTH = args.min_depth 120 | config_env.SIMULATOR.DEPTH_SENSOR.MAX_DEPTH = args.max_depth 121 | config_env.SIMULATOR.DEPTH_SENSOR.POSITION = [0, args.camera_height, 0] 122 | 123 | # config_env.SIMULATOR.SEMANTIC_SENSOR.WIDTH = args.env_frame_width 124 | # config_env.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = args.env_frame_height 125 | # config_env.SIMULATOR.SEMANTIC_SENSOR.HFOV = args.hfov 126 | # config_env.SIMULATOR.SEMANTIC_SENSOR.POSITION = \ 127 | # [0, args.camera_height, 0] 128 | 129 | config_env.SIMULATOR.TURN_ANGLE = args.turn_angle 130 | config_env.DATASET.SPLIT = args.split 131 | config_env.DATASET.DATA_PATH = \ 132 | config_env.DATASET.DATA_PATH.replace("v1", args.version) 133 | config_env.DATASET.EPISODES_DIR = \ 134 | config_env.DATASET.EPISODES_DIR.replace("v1", args.version) 135 | 136 | config_env.freeze() 137 | env_configs.append(config_env) 138 | 139 | args_list.append(args) 140 | 141 | envs = VectorEnv( 142 | make_env_fn=make_env_fn, 143 | env_fn_args=tuple( 144 | tuple( 145 | zip(args_list, env_configs, range(args.num_processes)) 146 | ) 147 | ), 148 | ) 149 | 150 | return envs 151 | -------------------------------------------------------------------------------- /envs/habitat/configs/tasks/objectnav_gibson.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 500 3 | SIMULATOR: 4 | TURN_ANGLE: 30 5 | TILT_ANGLE: 30 6 | ACTION_SPACE_CONFIG: "v1" 7 | AGENT_0: 8 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'SEMANTIC_SENSOR'] 9 | HEIGHT: 0.88 10 | RADIUS: 0.18 11 | HABITAT_SIM_V0: 12 | GPU_DEVICE_ID: 0 13 | ALLOW_SLIDING: True 14 | SEMANTIC_SENSOR: 15 | WIDTH: 640 16 | HEIGHT: 480 17 | HFOV: 79 18 | POSITION: [0, 0.88, 0] 19 | RGB_SENSOR: 20 | WIDTH: 640 21 | HEIGHT: 480 22 | HFOV: 79 23 | POSITION: [0, 0.88, 0] 24 | DEPTH_SENSOR: 25 | WIDTH: 640 26 | HEIGHT: 480 27 | HFOV: 79 28 | MIN_DEPTH: 0.5 29 | MAX_DEPTH: 5.0 30 | POSITION: [0, 0.88, 0] 31 | TASK: 32 | TYPE: ObjectNav-v1 33 | POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"] 34 | SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR'] 35 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL'] 36 | SUCCESS: 37 | SUCCESS_DISTANCE: 0.2 38 | 39 | DATASET: 40 | TYPE: PointNav-v1 41 | SPLIT: train 42 | DATA_PATH: "data/datasets/objectnav/gibson/v1/{split}/{split}.json.gz" 43 | EPISODES_DIR: "data/datasets/objectnav/gibson/v1/{split}/" 44 | SCENES_DIR: "data/scene_datasets/" 45 | -------------------------------------------------------------------------------- /envs/habitat/objectgoal_env.py: -------------------------------------------------------------------------------- 1 | import json 2 | import bz2 3 | import gzip 4 | import _pickle as cPickle 5 | import gym 6 | import numpy as np 7 | import quaternion 8 | import skimage.morphology 9 | import habitat 10 | 11 | from envs.utils.fmm_planner import FMMPlanner 12 | from constants import coco_categories 13 | import envs.utils.pose as pu 14 | 15 | 16 | class ObjectGoal_Env(habitat.RLEnv): 17 | """The Object Goal Navigation environment class. The class is responsible 18 | for loading the dataset, generating episodes, and computing evaluation 19 | metrics. 20 | """ 21 | 22 | def __init__(self, args, rank, config_env, dataset): 23 | self.args = args 24 | self.rank = rank 25 | 26 | super().__init__(config_env, dataset) 27 | 28 | # Loading dataset info file 29 | self.split = config_env.DATASET.SPLIT 30 | self.episodes_dir = config_env.DATASET.EPISODES_DIR.format( 31 | split=self.split) 32 | 33 | dataset_info_file = self.episodes_dir + \ 34 | "{split}_info.pbz2".format(split=self.split) 35 | with bz2.BZ2File(dataset_info_file, 'rb') as f: 36 | self.dataset_info = cPickle.load(f) 37 | 38 | # Specifying action and observation space 39 | self.action_space = gym.spaces.Discrete(3) 40 | 41 | self.observation_space = gym.spaces.Box(0, 255, 42 | (3, args.frame_height, 43 | args.frame_width), 44 | dtype='uint8') 45 | 46 | # Initializations 47 | self.episode_no = 0 48 | 49 | # Scene info 50 | self.last_scene_path = None 51 | self.scene_path = None 52 | self.scene_name = None 53 | 54 | # Episode Dataset info 55 | self.eps_data = None 56 | self.eps_data_idx = None 57 | self.gt_planner = None 58 | self.object_boundary = None 59 | self.goal_idx = None 60 | self.goal_name = None 61 | self.map_obj_origin = None 62 | self.starting_loc = None 63 | self.starting_distance = None 64 | 65 | # Episode tracking info 66 | self.curr_distance = None 67 | self.prev_distance = None 68 | self.timestep = None 69 | self.stopped = None 70 | self.path_length = None 71 | self.last_sim_location = None 72 | self.trajectory_states = [] 73 | self.info = {} 74 | self.info['distance_to_goal'] = None 75 | self.info['spl'] = None 76 | self.info['success'] = None 77 | 78 | def load_new_episode(self): 79 | """The function loads a fixed episode from the episode dataset. This 80 | function is used for evaluating a trained model on the val split. 81 | """ 82 | 83 | args = self.args 84 | self.scene_path = self.habitat_env.sim.config.SCENE 85 | scene_name = self.scene_path.split("/")[-1].split(".")[0] 86 | 87 | if self.scene_path != self.last_scene_path: 88 | episodes_file = self.episodes_dir + \ 89 | "content/{}_episodes.json.gz".format(scene_name) 90 | 91 | print("Loading episodes from: {}".format(episodes_file)) 92 | with gzip.open(episodes_file, 'r') as f: 93 | self.eps_data = json.loads( 94 | f.read().decode('utf-8'))["episodes"] 95 | 96 | self.eps_data_idx = 0 97 | self.last_scene_path = self.scene_path 98 | 99 | # Load episode info 100 | episode = self.eps_data[self.eps_data_idx] 101 | self.eps_data_idx += 1 102 | self.eps_data_idx = self.eps_data_idx % len(self.eps_data) 103 | pos = episode["start_position"] 104 | rot = quaternion.from_float_array(episode["start_rotation"]) 105 | 106 | goal_name = episode["object_category"] 107 | goal_idx = episode["object_id"] 108 | floor_idx = episode["floor_id"] 109 | 110 | # Load scene info 111 | scene_info = self.dataset_info[scene_name] 112 | sem_map = scene_info[floor_idx]['sem_map'] 113 | map_obj_origin = scene_info[floor_idx]['origin'] 114 | 115 | # Setup ground truth planner 116 | object_boundary = args.success_dist 117 | map_resolution = args.map_resolution 118 | selem = skimage.morphology.disk(2) 119 | traversible = skimage.morphology.binary_dilation( 120 | sem_map[0], selem) != True 121 | traversible = 1 - traversible 122 | planner = FMMPlanner(traversible) 123 | selem = skimage.morphology.disk( 124 | int(object_boundary * 100. / map_resolution)) 125 | goal_map = skimage.morphology.binary_dilation( 126 | sem_map[goal_idx + 1], selem) != True 127 | goal_map = 1 - goal_map 128 | planner.set_multi_goal(goal_map) 129 | 130 | # Get starting loc in GT map coordinates 131 | x = -pos[2] 132 | y = -pos[0] 133 | min_x, min_y = map_obj_origin / 100.0 134 | map_loc = int((-y - min_y) * 20.), int((-x - min_x) * 20.) 135 | 136 | self.gt_planner = planner 137 | self.starting_loc = map_loc 138 | self.object_boundary = object_boundary 139 | self.goal_idx = goal_idx 140 | self.goal_name = goal_name 141 | self.map_obj_origin = map_obj_origin 142 | 143 | self.starting_distance = self.gt_planner.fmm_dist[self.starting_loc]\ 144 | / 20.0 + self.object_boundary 145 | self.prev_distance = self.starting_distance 146 | self._env.sim.set_agent_state(pos, rot) 147 | 148 | # The following two should match approximately 149 | # print(starting_loc) 150 | # print(self.sim_continuous_to_sim_map(self.get_sim_location())) 151 | 152 | obs = self._env.sim.get_observations_at(pos, rot) 153 | 154 | return obs 155 | 156 | def generate_new_episode(self): 157 | """The function generates a random valid episode. This function is used 158 | for training a model on the train split. 159 | """ 160 | 161 | args = self.args 162 | 163 | self.scene_path = self.habitat_env.sim.config.SCENE 164 | scene_name = self.scene_path.split("/")[-1].split(".")[0] 165 | 166 | scene_info = self.dataset_info[scene_name] 167 | map_resolution = args.map_resolution 168 | 169 | floor_idx = np.random.randint(len(scene_info.keys())) 170 | floor_height = scene_info[floor_idx]['floor_height'] 171 | sem_map = scene_info[floor_idx]['sem_map'] 172 | map_obj_origin = scene_info[floor_idx]['origin'] 173 | 174 | cat_counts = sem_map.sum(2).sum(1) 175 | possible_cats = list(np.arange(6)) 176 | 177 | for i in range(6): 178 | if cat_counts[i + 1] == 0: 179 | possible_cats.remove(i) 180 | 181 | object_boundary = args.success_dist 182 | 183 | loc_found = False 184 | while not loc_found: 185 | if len(possible_cats) == 0: 186 | print("No valid objects for {}".format(floor_height)) 187 | eps = eps - 1 188 | continue 189 | 190 | goal_idx = np.random.choice(possible_cats) 191 | 192 | for key, value in coco_categories.items(): 193 | if value == goal_idx: 194 | goal_name = key 195 | 196 | selem = skimage.morphology.disk(2) 197 | traversible = skimage.morphology.binary_dilation( 198 | sem_map[0], selem) != True 199 | traversible = 1 - traversible 200 | 201 | planner = FMMPlanner(traversible) 202 | 203 | selem = skimage.morphology.disk( 204 | int(object_boundary * 100. / map_resolution)) 205 | goal_map = skimage.morphology.binary_dilation( 206 | sem_map[goal_idx + 1], selem) != True 207 | goal_map = 1 - goal_map 208 | 209 | planner.set_multi_goal(goal_map) 210 | 211 | m1 = sem_map[0] > 0 212 | m2 = planner.fmm_dist > (args.min_d - object_boundary) * 20.0 213 | m3 = planner.fmm_dist < (args.max_d - object_boundary) * 20.0 214 | 215 | possible_starting_locs = np.logical_and(m1, m2) 216 | possible_starting_locs = np.logical_and( 217 | possible_starting_locs, m3) * 1. 218 | if possible_starting_locs.sum() != 0: 219 | loc_found = True 220 | else: 221 | print("Invalid object: {} / {} / {}".format( 222 | scene_name, floor_height, goal_name)) 223 | possible_cats.remove(goal_idx) 224 | scene_info[floor_idx]["sem_map"][goal_idx + 1, :, :] = 0. 225 | self.dataset_info[scene_name][floor_idx][ 226 | "sem_map"][goal_idx + 1, :, :] = 0. 227 | 228 | loc_found = False 229 | while not loc_found: 230 | pos = self._env.sim.sample_navigable_point() 231 | x = -pos[2] 232 | y = -pos[0] 233 | min_x, min_y = map_obj_origin / 100.0 234 | map_loc = int((-y - min_y) * 20.), int((-x - min_x) * 20.) 235 | if abs(pos[1] - floor_height) < args.floor_thr / 100.0 and \ 236 | possible_starting_locs[map_loc[0], map_loc[1]] == 1: 237 | loc_found = True 238 | 239 | agent_state = self._env.sim.get_agent_state(0) 240 | rotation = agent_state.rotation 241 | rvec = quaternion.as_rotation_vector(rotation) 242 | rvec[1] = np.random.rand() * 2 * np.pi 243 | rot = quaternion.from_rotation_vector(rvec) 244 | 245 | self.gt_planner = planner 246 | self.starting_loc = map_loc 247 | self.object_boundary = object_boundary 248 | self.goal_idx = goal_idx 249 | self.goal_name = goal_name 250 | self.map_obj_origin = map_obj_origin 251 | 252 | self.starting_distance = self.gt_planner.fmm_dist[self.starting_loc] \ 253 | / 20.0 + self.object_boundary 254 | self.prev_distance = self.starting_distance 255 | 256 | self._env.sim.set_agent_state(pos, rot) 257 | 258 | # The following two should match approximately 259 | # print(starting_loc) 260 | # print(self.sim_continuous_to_sim_map(self.get_sim_location())) 261 | 262 | obs = self._env.sim.get_observations_at(pos, rot) 263 | 264 | return obs 265 | 266 | def sim_map_to_sim_continuous(self, coords): 267 | """Converts ground-truth 2D Map coordinates to absolute Habitat 268 | simulator position and rotation. 269 | """ 270 | agent_state = self._env.sim.get_agent_state(0) 271 | y, x = coords 272 | min_x, min_y = self.map_obj_origin / 100.0 273 | 274 | cont_x = x / 20. + min_x 275 | cont_y = y / 20. + min_y 276 | agent_state.position[0] = cont_y 277 | agent_state.position[2] = cont_x 278 | 279 | rotation = agent_state.rotation 280 | rvec = quaternion.as_rotation_vector(rotation) 281 | 282 | if self.args.train_single_eps: 283 | rvec[1] = 0.0 284 | else: 285 | rvec[1] = np.random.rand() * 2 * np.pi 286 | rot = quaternion.from_rotation_vector(rvec) 287 | 288 | return agent_state.position, rot 289 | 290 | def sim_continuous_to_sim_map(self, sim_loc): 291 | """Converts absolute Habitat simulator pose to ground-truth 2D Map 292 | coordinates. 293 | """ 294 | x, y, o = sim_loc 295 | min_x, min_y = self.map_obj_origin / 100.0 296 | x, y = int((-x - min_x) * 20.), int((-y - min_y) * 20.) 297 | 298 | o = np.rad2deg(o) + 180.0 299 | return y, x, o 300 | 301 | def reset(self): 302 | """Resets the environment to a new episode. 303 | 304 | Returns: 305 | obs (ndarray): RGBD observations (4 x H x W) 306 | info (dict): contains timestep, pose, goal category and 307 | evaluation metric info 308 | """ 309 | args = self.args 310 | new_scene = self.episode_no % args.num_train_episodes == 0 311 | 312 | self.episode_no += 1 313 | 314 | # Initializations 315 | self.timestep = 0 316 | self.stopped = False 317 | self.path_length = 1e-5 318 | self.trajectory_states = [] 319 | 320 | if new_scene: 321 | obs = super().reset() 322 | self.scene_name = self.habitat_env.sim.config.SCENE 323 | print("Changing scene: {}/{}".format(self.rank, self.scene_name)) 324 | 325 | self.scene_path = self.habitat_env.sim.config.SCENE 326 | 327 | if self.split == "val": 328 | obs = self.load_new_episode() 329 | else: 330 | obs = self.generate_new_episode() 331 | 332 | rgb = obs['rgb'].astype(np.uint8) 333 | depth = obs['depth'] 334 | state = np.concatenate((rgb, depth), axis=2).transpose(2, 0, 1) 335 | self.last_sim_location = self.get_sim_location() 336 | 337 | # Set info 338 | self.info['time'] = self.timestep 339 | self.info['sensor_pose'] = [0., 0., 0.] 340 | self.info['goal_cat_id'] = self.goal_idx 341 | self.info['goal_name'] = self.goal_name 342 | 343 | return state, self.info 344 | 345 | def step(self, action): 346 | """Function to take an action in the environment. 347 | 348 | Args: 349 | action (dict): 350 | dict with following keys: 351 | 'action' (int): 0: stop, 1: forward, 2: left, 3: right 352 | 353 | Returns: 354 | obs (ndarray): RGBD observations (4 x H x W) 355 | reward (float): amount of reward returned after previous action 356 | done (bool): whether the episode has ended 357 | info (dict): contains timestep, pose, goal category and 358 | evaluation metric info 359 | """ 360 | action = action["action"] 361 | if action == 0: 362 | self.stopped = True 363 | # Not sending stop to simulator, resetting manually 364 | action = 3 365 | 366 | obs, rew, done, _ = super().step(action) 367 | 368 | # Get pose change 369 | dx, dy, do = self.get_pose_change() 370 | self.info['sensor_pose'] = [dx, dy, do] 371 | self.path_length += pu.get_l2_distance(0, dx, 0, dy) 372 | 373 | spl, success, dist = 0., 0., 0. 374 | if done: 375 | spl, success, dist = self.get_metrics() 376 | self.info['distance_to_goal'] = dist 377 | self.info['spl'] = spl 378 | self.info['success'] = success 379 | 380 | rgb = obs['rgb'].astype(np.uint8) 381 | depth = obs['depth'] 382 | state = np.concatenate((rgb, depth), axis=2).transpose(2, 0, 1) 383 | 384 | self.timestep += 1 385 | self.info['time'] = self.timestep 386 | 387 | return state, rew, done, self.info 388 | 389 | def get_reward_range(self): 390 | """This function is not used, Habitat-RLEnv requires this function""" 391 | return (0., 1.0) 392 | 393 | def get_reward(self, observations): 394 | curr_loc = self.sim_continuous_to_sim_map(self.get_sim_location()) 395 | self.curr_distance = self.gt_planner.fmm_dist[curr_loc[0], 396 | curr_loc[1]] / 20.0 397 | 398 | reward = (self.prev_distance - self.curr_distance) * \ 399 | self.args.reward_coeff 400 | 401 | self.prev_distance = self.curr_distance 402 | return reward 403 | 404 | def get_metrics(self): 405 | """This function computes evaluation metrics for the Object Goal task 406 | 407 | Returns: 408 | spl (float): Success weighted by Path Length 409 | (See https://arxiv.org/pdf/1807.06757.pdf) 410 | success (int): 0: Failure, 1: Successful 411 | dist (float): Distance to Success (DTS), distance of the agent 412 | from the success threshold boundary in meters. 413 | (See https://arxiv.org/pdf/2007.00643.pdf) 414 | """ 415 | curr_loc = self.sim_continuous_to_sim_map(self.get_sim_location()) 416 | dist = self.gt_planner.fmm_dist[curr_loc[0], curr_loc[1]] / 20.0 417 | if dist == 0.0: 418 | success = 1 419 | else: 420 | success = 0 421 | spl = min(success * self.starting_distance / self.path_length, 1) 422 | return spl, success, dist 423 | 424 | def get_done(self, observations): 425 | if self.info['time'] >= self.args.max_episode_length - 1: 426 | done = True 427 | elif self.stopped: 428 | done = True 429 | else: 430 | done = False 431 | return done 432 | 433 | def get_info(self, observations): 434 | """This function is not used, Habitat-RLEnv requires this function""" 435 | info = {} 436 | return info 437 | 438 | def get_spaces(self): 439 | """Returns observation and action spaces for the ObjectGoal task.""" 440 | return self.observation_space, self.action_space 441 | 442 | def get_sim_location(self): 443 | """Returns x, y, o pose of the agent in the Habitat simulator.""" 444 | 445 | agent_state = super().habitat_env.sim.get_agent_state(0) 446 | x = -agent_state.position[2] 447 | y = -agent_state.position[0] 448 | axis = quaternion.as_euler_angles(agent_state.rotation)[0] 449 | if (axis % (2 * np.pi)) < 0.1 or (axis % 450 | (2 * np.pi)) > 2 * np.pi - 0.1: 451 | o = quaternion.as_euler_angles(agent_state.rotation)[1] 452 | else: 453 | o = 2 * np.pi - quaternion.as_euler_angles(agent_state.rotation)[1] 454 | if o > np.pi: 455 | o -= 2 * np.pi 456 | return x, y, o 457 | 458 | def get_pose_change(self): 459 | """Returns dx, dy, do pose change of the agent relative to the last 460 | timestep.""" 461 | curr_sim_pose = self.get_sim_location() 462 | dx, dy, do = pu.get_rel_pose_change( 463 | curr_sim_pose, self.last_sim_location) 464 | self.last_sim_location = curr_sim_pose 465 | return dx, dy, do 466 | -------------------------------------------------------------------------------- /envs/habitat/utils/vector_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from multiprocessing.connection import Connection 8 | from multiprocessing.context import BaseContext 9 | from queue import Queue 10 | from threading import Thread 11 | from typing import ( 12 | Any, 13 | Callable, 14 | Dict, 15 | List, 16 | Optional, 17 | Sequence, 18 | Set, 19 | Tuple, 20 | Union, 21 | ) 22 | 23 | import gym 24 | import numpy as np 25 | from gym.spaces.dict_space import Dict as SpaceDict 26 | 27 | import habitat 28 | from habitat.config import Config 29 | from habitat.core.env import Env, Observations, RLEnv 30 | from habitat.core.logging import logger 31 | from habitat.core.utils import tile_images 32 | 33 | try: 34 | # Use torch.multiprocessing if we can. 35 | # We have yet to find a reason to not use it and 36 | # you are required to use it when sending a torch.Tensor 37 | # between processes 38 | import torch.multiprocessing as mp 39 | except ImportError: 40 | import multiprocessing as mp 41 | 42 | STEP_COMMAND = "step" 43 | RESET_COMMAND = "reset" 44 | RENDER_COMMAND = "render" 45 | CLOSE_COMMAND = "close" 46 | OBSERVATION_SPACE_COMMAND = "observation_space" 47 | ACTION_SPACE_COMMAND = "action_space" 48 | CALL_COMMAND = "call" 49 | EPISODE_COMMAND = "current_episode" 50 | PLAN_ACT_AND_PREPROCESS = "plan_act_and_preprocess" 51 | COUNT_EPISODES_COMMAND = "count_episodes" 52 | EPISODE_OVER = "episode_over" 53 | GET_METRICS = "get_metrics" 54 | 55 | 56 | def _make_env_fn( 57 | config: Config, dataset: Optional[habitat.Dataset] = None, rank: int = 0 58 | ) -> Env: 59 | """Constructor for default habitat `env.Env`. 60 | 61 | :param config: configuration for environment. 62 | :param dataset: dataset for environment. 63 | :param rank: rank for setting seed of environment 64 | :return: `env.Env` / `env.RLEnv` object 65 | """ 66 | habitat_env = Env(config=config, dataset=dataset) 67 | habitat_env.seed(config.SEED + rank) 68 | return habitat_env 69 | 70 | 71 | class VectorEnv: 72 | r"""Vectorized environment which creates multiple processes where each 73 | process runs its own environment. Main class for parallelization of 74 | training and evaluation. 75 | 76 | 77 | All the environments are synchronized on step and reset methods. 78 | """ 79 | 80 | observation_spaces: List[SpaceDict] 81 | action_spaces: List[SpaceDict] 82 | _workers: List[Union[mp.Process, Thread]] 83 | _is_waiting: bool 84 | _num_envs: int 85 | _auto_reset_done: bool 86 | _mp_ctx: BaseContext 87 | _connection_read_fns: List[Callable[[], Any]] 88 | _connection_write_fns: List[Callable[[Any], None]] 89 | 90 | def __init__( 91 | self, 92 | make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn, 93 | env_fn_args: Sequence[Tuple] = None, 94 | auto_reset_done: bool = True, 95 | multiprocessing_start_method: str = "forkserver", 96 | ) -> None: 97 | """.. 98 | 99 | :param make_env_fn: function which creates a single environment. An 100 | environment can be of type `env.Env` or `env.RLEnv` 101 | :param env_fn_args: tuple of tuple of args to pass to the 102 | `_make_env_fn`. 103 | :param auto_reset_done: automatically reset the environment when 104 | done. This functionality is provided for seamless training 105 | of vectorized environments. 106 | :param multiprocessing_start_method: the multiprocessing method used to 107 | spawn worker processes. Valid methods are 108 | :py:`{'spawn', 'forkserver', 'fork'}`; :py:`'forkserver'` is the 109 | recommended method as it works well with CUDA. If :py:`'fork'` is 110 | used, the subproccess must be started before any other GPU useage. 111 | """ 112 | self._is_waiting = False 113 | self._is_closed = True 114 | 115 | assert ( 116 | env_fn_args is not None and len(env_fn_args) > 0 117 | ), "number of environments to be created should be greater than 0" 118 | 119 | self._num_envs = len(env_fn_args) 120 | 121 | assert multiprocessing_start_method in self._valid_start_methods, ( 122 | "multiprocessing_start_method must be one of {}. Got '{}'" 123 | ).format(self._valid_start_methods, multiprocessing_start_method) 124 | self._auto_reset_done = auto_reset_done 125 | self._mp_ctx = mp.get_context(multiprocessing_start_method) 126 | self._workers = [] 127 | ( 128 | self._connection_read_fns, 129 | self._connection_write_fns, 130 | ) = self._spawn_workers( # noqa 131 | env_fn_args, make_env_fn 132 | ) 133 | 134 | self._is_closed = False 135 | 136 | for write_fn in self._connection_write_fns: 137 | write_fn((OBSERVATION_SPACE_COMMAND, None)) 138 | self.observation_spaces = [ 139 | read_fn() for read_fn in self._connection_read_fns 140 | ] 141 | for write_fn in self._connection_write_fns: 142 | write_fn((ACTION_SPACE_COMMAND, None)) 143 | self.action_spaces = [ 144 | read_fn() for read_fn in self._connection_read_fns 145 | ] 146 | self.observation_space = self.observation_spaces[0] 147 | self.action_space = self.action_spaces[0] 148 | self._paused = [] 149 | 150 | @property 151 | def num_envs(self): 152 | r"""number of individual environments. 153 | """ 154 | return self._num_envs - len(self._paused) 155 | 156 | @staticmethod 157 | def _worker_env( 158 | connection_read_fn: Callable, 159 | connection_write_fn: Callable, 160 | env_fn: Callable, 161 | env_fn_args: Tuple[Any], 162 | auto_reset_done: bool, 163 | child_pipe: Optional[Connection] = None, 164 | parent_pipe: Optional[Connection] = None, 165 | ) -> None: 166 | r"""process worker for creating and interacting with the environment. 167 | """ 168 | env = env_fn(*env_fn_args) 169 | if parent_pipe is not None: 170 | parent_pipe.close() 171 | try: 172 | command, data = connection_read_fn() 173 | while command != CLOSE_COMMAND: 174 | if command == STEP_COMMAND: 175 | # different step methods for habitat.RLEnv and habitat.Env 176 | if isinstance(env, habitat.RLEnv) or isinstance( 177 | env, gym.Env 178 | ): 179 | # habitat.RLEnv 180 | observations, reward, done, info = env.step(**data) 181 | if auto_reset_done and done: 182 | observations, info = env.reset() 183 | connection_write_fn((observations, reward, done, info)) 184 | elif isinstance(env, habitat.Env): 185 | # habitat.Env 186 | observations = env.step(**data) 187 | if auto_reset_done and env.episode_over: 188 | observations = env.reset() 189 | connection_write_fn(observations) 190 | else: 191 | raise NotImplementedError 192 | 193 | elif command == RESET_COMMAND: 194 | observations = env.reset() 195 | connection_write_fn(observations) 196 | 197 | elif command == RENDER_COMMAND: 198 | connection_write_fn(env.render(*data[0], **data[1])) 199 | 200 | elif ( 201 | command == OBSERVATION_SPACE_COMMAND 202 | or command == ACTION_SPACE_COMMAND 203 | ): 204 | if isinstance(command, str): 205 | connection_write_fn(getattr(env, command)) 206 | 207 | elif command == CALL_COMMAND: 208 | function_name, function_args = data 209 | if function_args is None or len(function_args) == 0: 210 | result = getattr(env, function_name)() 211 | else: 212 | result = getattr(env, function_name)(**function_args) 213 | connection_write_fn(result) 214 | 215 | # TODO: update CALL_COMMAND for getting attribute like this 216 | elif command == EPISODE_COMMAND: 217 | connection_write_fn(env.current_episode) 218 | 219 | elif command == PLAN_ACT_AND_PREPROCESS: 220 | observations, reward, done, info = \ 221 | env.plan_act_and_preprocess(data) 222 | if auto_reset_done and done: 223 | observations, info = env.reset() 224 | connection_write_fn((observations, reward, done, info)) 225 | 226 | elif command == COUNT_EPISODES_COMMAND: 227 | connection_write_fn(len(env.episodes)) 228 | 229 | elif command == EPISODE_OVER: 230 | connection_write_fn(env.episode_over) 231 | 232 | elif command == GET_METRICS: 233 | result = env.get_metrics() 234 | connection_write_fn(result) 235 | 236 | else: 237 | raise NotImplementedError 238 | 239 | command, data = connection_read_fn() 240 | 241 | if child_pipe is not None: 242 | child_pipe.close() 243 | except KeyboardInterrupt: 244 | logger.info("Worker KeyboardInterrupt") 245 | finally: 246 | env.close() 247 | 248 | def _spawn_workers( 249 | self, 250 | env_fn_args: Sequence[Tuple], 251 | make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn, 252 | ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]: 253 | parent_connections, worker_connections = zip( 254 | *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_envs)] 255 | ) 256 | self._workers = [] 257 | for worker_conn, parent_conn, env_args in zip( 258 | worker_connections, parent_connections, env_fn_args 259 | ): 260 | ps = self._mp_ctx.Process( 261 | target=self._worker_env, 262 | args=( 263 | worker_conn.recv, 264 | worker_conn.send, 265 | make_env_fn, 266 | env_args, 267 | self._auto_reset_done, 268 | worker_conn, 269 | parent_conn, 270 | ), 271 | ) 272 | self._workers.append(ps) 273 | ps.daemon = True 274 | ps.start() 275 | worker_conn.close() 276 | return ( 277 | [p.recv for p in parent_connections], 278 | [p.send for p in parent_connections], 279 | ) 280 | 281 | def current_episodes(self): 282 | self._is_waiting = True 283 | for write_fn in self._connection_write_fns: 284 | write_fn((EPISODE_COMMAND, None)) 285 | results = [] 286 | for read_fn in self._connection_read_fns: 287 | results.append(read_fn()) 288 | self._is_waiting = False 289 | return results 290 | 291 | def count_episodes(self): 292 | self._is_waiting = True 293 | for write_fn in self._connection_write_fns: 294 | write_fn((COUNT_EPISODES_COMMAND, None)) 295 | results = [] 296 | for read_fn in self._connection_read_fns: 297 | results.append(read_fn()) 298 | self._is_waiting = False 299 | return results 300 | 301 | def episode_over(self): 302 | self._is_waiting = True 303 | for write_fn in self._connection_write_fns: 304 | write_fn((EPISODE_OVER, None)) 305 | results = [] 306 | for read_fn in self._connection_read_fns: 307 | results.append(read_fn()) 308 | self._is_waiting = False 309 | return results 310 | 311 | def get_metrics(self): 312 | self._is_waiting = True 313 | for write_fn in self._connection_write_fns: 314 | write_fn((GET_METRICS, None)) 315 | results = [] 316 | for read_fn in self._connection_read_fns: 317 | results.append(read_fn()) 318 | self._is_waiting = False 319 | return results 320 | 321 | def reset(self): 322 | r"""Reset all the vectorized environments 323 | 324 | :return: list of outputs from the reset method of envs. 325 | """ 326 | self._is_waiting = True 327 | for write_fn in self._connection_write_fns: 328 | write_fn((RESET_COMMAND, None)) 329 | results = [] 330 | for read_fn in self._connection_read_fns: 331 | results.append(read_fn()) 332 | obs, infos = zip(*results) 333 | 334 | self._is_waiting = False 335 | return np.stack(obs), infos 336 | 337 | def reset_at(self, index_env: int): 338 | r"""Reset in the index_env environment in the vector. 339 | 340 | :param index_env: index of the environment to be reset 341 | :return: list containing the output of reset method of indexed env. 342 | """ 343 | self._is_waiting = True 344 | self._connection_write_fns[index_env]((RESET_COMMAND, None)) 345 | results = [self._connection_read_fns[index_env]()] 346 | self._is_waiting = False 347 | return results 348 | 349 | def step_at(self, index_env: int, action: Dict[str, Any]): 350 | r"""Step in the index_env environment in the vector. 351 | 352 | :param index_env: index of the environment to be stepped into 353 | :param action: action to be taken 354 | :return: list containing the output of step method of indexed env. 355 | """ 356 | self._is_waiting = True 357 | self._connection_write_fns[index_env]((STEP_COMMAND, action)) 358 | results = [self._connection_read_fns[index_env]()] 359 | self._is_waiting = False 360 | return results 361 | 362 | def step_async(self, data: List[Union[int, str, Dict[str, Any]]]) -> None: 363 | r"""Asynchronously step in the environments. 364 | 365 | :param data: list of size _num_envs containing keyword arguments to 366 | pass to `step` method for each Environment. For example, 367 | :py:`[{"action": "TURN_LEFT", "action_args": {...}}, ...]`. 368 | """ 369 | # Backward compatibility 370 | if isinstance(data[0], (int, np.integer, str)): 371 | data = [{"action": {"action": action}} for action in data] 372 | 373 | self._is_waiting = True 374 | for write_fn, args in zip(self._connection_write_fns, data): 375 | write_fn((STEP_COMMAND, args)) 376 | 377 | def step_wait(self) -> List[Observations]: 378 | r"""Wait until all the asynchronized environments have synchronized. 379 | """ 380 | results = [] 381 | for read_fn in self._connection_read_fns: 382 | results.append(read_fn()) 383 | self._is_waiting = False 384 | obs, rews, dones, infos = zip(*results) 385 | return np.stack(obs), np.stack(rews), np.stack(dones), infos 386 | 387 | def step(self, data: List[Union[int, str, Dict[str, Any]]]) -> List[Any]: 388 | r"""Perform actions in the vectorized environments. 389 | 390 | :param data: list of size _num_envs containing keyword arguments to 391 | pass to `step` method for each Environment. For example, 392 | :py:`[{"action": "TURN_LEFT", "action_args": {...}}, ...]`. 393 | :return: list of outputs from the step method of envs. 394 | """ 395 | self.step_async(data) 396 | return self.step_wait() 397 | 398 | def close(self) -> None: 399 | if self._is_closed: 400 | return 401 | 402 | if self._is_waiting: 403 | for read_fn in self._connection_read_fns: 404 | read_fn() 405 | 406 | for write_fn in self._connection_write_fns: 407 | write_fn((CLOSE_COMMAND, None)) 408 | 409 | for _, _, write_fn, _ in self._paused: 410 | write_fn((CLOSE_COMMAND, None)) 411 | 412 | for process in self._workers: 413 | process.join() 414 | 415 | for _, _, _, process in self._paused: 416 | process.join() 417 | 418 | self._is_closed = True 419 | 420 | def pause_at(self, index: int) -> None: 421 | r"""Pauses computation on this env without destroying the env. 422 | 423 | :param index: which env to pause. All indexes after this one will be 424 | shifted down by one. 425 | 426 | This is useful for not needing to call steps on all environments when 427 | only some are active (for example during the last episodes of running 428 | eval episodes). 429 | """ 430 | if self._is_waiting: 431 | for read_fn in self._connection_read_fns: 432 | read_fn() 433 | read_fn = self._connection_read_fns.pop(index) 434 | write_fn = self._connection_write_fns.pop(index) 435 | worker = self._workers.pop(index) 436 | self._paused.append((index, read_fn, write_fn, worker)) 437 | 438 | def resume_all(self) -> None: 439 | r"""Resumes any paused envs. 440 | """ 441 | for index, read_fn, write_fn, worker in reversed(self._paused): 442 | self._connection_read_fns.insert(index, read_fn) 443 | self._connection_write_fns.insert(index, write_fn) 444 | self._workers.insert(index, worker) 445 | self._paused = [] 446 | 447 | def call_at( 448 | self, 449 | index: int, 450 | function_name: str, 451 | function_args: Optional[Dict[str, Any]] = None, 452 | ) -> Any: 453 | r"""Calls a function (which is passed by name) on the selected env and 454 | returns the result. 455 | 456 | :param index: which env to call the function on. 457 | :param function_name: the name of the function to call on the env. 458 | :param function_args: optional function args. 459 | :return: result of calling the function. 460 | """ 461 | self._is_waiting = True 462 | self._connection_write_fns[index]( 463 | (CALL_COMMAND, (function_name, function_args)) 464 | ) 465 | result = self._connection_read_fns[index]() 466 | self._is_waiting = False 467 | return result 468 | 469 | def call( 470 | self, 471 | function_names: List[str], 472 | function_args_list: Optional[List[Any]] = None, 473 | ) -> List[Any]: 474 | r"""Calls a list of functions (which are passed by name) on the 475 | corresponding env (by index). 476 | 477 | :param function_names: the name of the functions to call on the envs. 478 | :param function_args_list: list of function args for each function. If 479 | provided, :py:`len(function_args_list)` should be as long as 480 | :py:`len(function_names)`. 481 | :return: result of calling the function. 482 | """ 483 | self._is_waiting = True 484 | if function_args_list is None: 485 | function_args_list = [None] * len(function_names) 486 | assert len(function_names) == len(function_args_list) 487 | func_args = zip(function_names, function_args_list) 488 | for write_fn, func_args_on in zip( 489 | self._connection_write_fns, func_args 490 | ): 491 | write_fn((CALL_COMMAND, func_args_on)) 492 | results = [] 493 | for read_fn in self._connection_read_fns: 494 | results.append(read_fn()) 495 | self._is_waiting = False 496 | return results 497 | 498 | def render( 499 | self, mode: str = "human", *args, **kwargs 500 | ) -> Union[np.ndarray, None]: 501 | r"""Render observations from all environments in a tiled image. 502 | """ 503 | for write_fn in self._connection_write_fns: 504 | write_fn((RENDER_COMMAND, (args, {"mode": "rgb", **kwargs}))) 505 | images = [read_fn() for read_fn in self._connection_read_fns] 506 | tile = tile_images(images) 507 | if mode == "human": 508 | from habitat.core.utils import try_cv2_import 509 | 510 | cv2 = try_cv2_import() 511 | 512 | cv2.imshow("vecenv", tile[:, :, ::-1]) 513 | cv2.waitKey(1) 514 | return None 515 | elif mode == "rgb_array": 516 | return tile 517 | else: 518 | raise NotImplementedError 519 | 520 | def plan_act_and_preprocess(self, inputs): 521 | self._assert_not_closed() 522 | self._is_waiting = True 523 | for e, write_fn in enumerate(self._connection_write_fns): 524 | write_fn((PLAN_ACT_AND_PREPROCESS, inputs[e])) 525 | results = [] 526 | for read_fn in self._connection_read_fns: 527 | results.append(read_fn()) 528 | obs, rews, dones, infos = zip(*results) 529 | self._is_waiting = False 530 | return np.stack(obs), np.stack(rews), np.stack(dones), infos 531 | 532 | def _assert_not_closed(self): 533 | assert not self._is_closed, "Trying to operate on a SubprocVecEnv after calling close()" 534 | 535 | @property 536 | def _valid_start_methods(self) -> Set[str]: 537 | return {"forkserver", "spawn", "fork"} 538 | 539 | def __del__(self): 540 | self.close() 541 | 542 | def __enter__(self): 543 | return self 544 | 545 | def __exit__(self, exc_type, exc_val, exc_tb): 546 | self.close() 547 | 548 | 549 | class ThreadedVectorEnv(VectorEnv): 550 | r"""Provides same functionality as `VectorEnv`, the only difference is it 551 | runs in a multi-thread setup inside a single process. 552 | 553 | `VectorEnv` runs in a multi-proc setup. This makes it much easier to debug 554 | when using `VectorEnv` because you can actually put break points in the 555 | environment methods. It should not be used for best performance. 556 | """ 557 | 558 | def _spawn_workers( 559 | self, 560 | env_fn_args: Sequence[Tuple], 561 | make_env_fn: Callable[..., Env] = _make_env_fn, 562 | ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]: 563 | parent_read_queues, parent_write_queues = zip( 564 | *[(Queue(), Queue()) for _ in range(self._num_envs)] 565 | ) 566 | self._workers = [] 567 | for parent_read_queue, parent_write_queue, env_args in zip( 568 | parent_read_queues, parent_write_queues, env_fn_args 569 | ): 570 | thread = Thread( 571 | target=self._worker_env, 572 | args=( 573 | parent_write_queue.get, 574 | parent_read_queue.put, 575 | make_env_fn, 576 | env_args, 577 | self._auto_reset_done, 578 | ), 579 | ) 580 | self._workers.append(thread) 581 | thread.daemon = True 582 | thread.start() 583 | return ( 584 | [q.get for q in parent_read_queues], 585 | [q.put for q in parent_write_queues], 586 | ) 587 | -------------------------------------------------------------------------------- /envs/utils/depth_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utilities for processing depth images. 17 | """ 18 | from argparse import Namespace 19 | 20 | import itertools 21 | import numpy as np 22 | import torch 23 | 24 | import envs.utils.rotation_utils as ru 25 | 26 | 27 | def get_camera_matrix(width, height, fov): 28 | """Returns a camera matrix from image size and fov.""" 29 | xc = (width - 1.) / 2. 30 | zc = (height - 1.) / 2. 31 | f = (width / 2.) / np.tan(np.deg2rad(fov / 2.)) 32 | camera_matrix = {'xc': xc, 'zc': zc, 'f': f} 33 | camera_matrix = Namespace(**camera_matrix) 34 | return camera_matrix 35 | 36 | 37 | def get_point_cloud_from_z(Y, camera_matrix, scale=1): 38 | """Projects the depth image Y into a 3D point cloud. 39 | Inputs: 40 | Y is ...xHxW 41 | camera_matrix 42 | Outputs: 43 | X is positive going right 44 | Y is positive into the image 45 | Z is positive up in the image 46 | XYZ is ...xHxWx3 47 | """ 48 | x, z = np.meshgrid(np.arange(Y.shape[-1]), 49 | np.arange(Y.shape[-2] - 1, -1, -1)) 50 | for _ in range(Y.ndim - 2): 51 | x = np.expand_dims(x, axis=0) 52 | z = np.expand_dims(z, axis=0) 53 | X = (x[::scale, ::scale] - camera_matrix.xc) * \ 54 | Y[::scale, ::scale] / camera_matrix.f 55 | Z = (z[::scale, ::scale] - camera_matrix.zc) * \ 56 | Y[::scale, ::scale] / camera_matrix.f 57 | XYZ = np.concatenate((X[..., np.newaxis], 58 | Y[::scale, ::scale][..., np.newaxis], 59 | Z[..., np.newaxis]), axis=X.ndim) 60 | return XYZ 61 | 62 | 63 | def transform_camera_view(XYZ, sensor_height, camera_elevation_degree): 64 | """ 65 | Transforms the point cloud into geocentric frame to account for 66 | camera elevation and angle 67 | Input: 68 | XYZ : ...x3 69 | sensor_height : height of the sensor 70 | camera_elevation_degree : camera elevation to rectify. 71 | Output: 72 | XYZ : ...x3 73 | """ 74 | R = ru.get_r_matrix( 75 | [1., 0., 0.], angle=np.deg2rad(camera_elevation_degree)) 76 | XYZ = np.matmul(XYZ.reshape(-1, 3), R.T).reshape(XYZ.shape) 77 | XYZ[..., 2] = XYZ[..., 2] + sensor_height 78 | return XYZ 79 | 80 | 81 | def transform_pose(XYZ, current_pose): 82 | """ 83 | Transforms the point cloud into geocentric frame to account for 84 | camera position 85 | Input: 86 | XYZ : ...x3 87 | current_pose : camera position (x, y, theta (radians)) 88 | Output: 89 | XYZ : ...x3 90 | """ 91 | R = ru.get_r_matrix([0., 0., 1.], angle=current_pose[2] - np.pi / 2.) 92 | XYZ = np.matmul(XYZ.reshape(-1, 3), R.T).reshape(XYZ.shape) 93 | XYZ[:, :, 0] = XYZ[:, :, 0] + current_pose[0] 94 | XYZ[:, :, 1] = XYZ[:, :, 1] + current_pose[1] 95 | return XYZ 96 | 97 | 98 | def bin_points(XYZ_cms, map_size, z_bins, xy_resolution): 99 | """Bins points into xy-z bins 100 | XYZ_cms is ... x H x W x3 101 | Outputs is ... x map_size x map_size x (len(z_bins)+1) 102 | """ 103 | sh = XYZ_cms.shape 104 | XYZ_cms = XYZ_cms.reshape([-1, sh[-3], sh[-2], sh[-1]]) 105 | n_z_bins = len(z_bins) + 1 106 | counts = [] 107 | for XYZ_cm in XYZ_cms: 108 | isnotnan = np.logical_not(np.isnan(XYZ_cm[:, :, 0])) 109 | X_bin = np.round(XYZ_cm[:, :, 0] / xy_resolution).astype(np.int32) 110 | Y_bin = np.round(XYZ_cm[:, :, 1] / xy_resolution).astype(np.int32) 111 | Z_bin = np.digitize(XYZ_cm[:, :, 2], bins=z_bins).astype(np.int32) 112 | 113 | isvalid = np.array([X_bin >= 0, X_bin < map_size, Y_bin >= 0, 114 | Y_bin < map_size, 115 | Z_bin >= 0, Z_bin < n_z_bins, isnotnan]) 116 | isvalid = np.all(isvalid, axis=0) 117 | 118 | ind = (Y_bin * map_size + X_bin) * n_z_bins + Z_bin 119 | ind[np.logical_not(isvalid)] = 0 120 | count = np.bincount(ind.ravel(), isvalid.ravel().astype(np.int32), 121 | minlength=map_size * map_size * n_z_bins) 122 | counts = np.reshape(count, [map_size, map_size, n_z_bins]) 123 | 124 | counts = counts.reshape(list(sh[:-3]) + [map_size, map_size, n_z_bins]) 125 | 126 | return counts 127 | 128 | 129 | def get_point_cloud_from_z_t(Y_t, camera_matrix, device, scale=1): 130 | """Projects the depth image Y into a 3D point cloud. 131 | Inputs: 132 | Y is ...xHxW 133 | camera_matrix 134 | Outputs: 135 | X is positive going right 136 | Y is positive into the image 137 | Z is positive up in the image 138 | XYZ is ...xHxWx3 139 | """ 140 | grid_x, grid_z = torch.meshgrid(torch.arange(Y_t.shape[-1]), 141 | torch.arange(Y_t.shape[-2] - 1, -1, -1)) 142 | grid_x = grid_x.transpose(1, 0).to(device) 143 | grid_z = grid_z.transpose(1, 0).to(device) 144 | grid_x = grid_x.unsqueeze(0).expand(Y_t.size()) 145 | grid_z = grid_z.unsqueeze(0).expand(Y_t.size()) 146 | 147 | X_t = (grid_x[:, ::scale, ::scale] - camera_matrix.xc) * \ 148 | Y_t[:, ::scale, ::scale] / camera_matrix.f 149 | Z_t = (grid_z[:, ::scale, ::scale] - camera_matrix.zc) * \ 150 | Y_t[:, ::scale, ::scale] / camera_matrix.f 151 | 152 | XYZ = torch.stack( 153 | (X_t, Y_t[:, ::scale, ::scale], Z_t), dim=len(Y_t.size())) 154 | 155 | return XYZ 156 | 157 | 158 | def transform_camera_view_t( 159 | XYZ, sensor_height, camera_elevation_degree, device): 160 | """ 161 | Transforms the point cloud into geocentric frame to account for 162 | camera elevation and angle 163 | Input: 164 | XYZ : ...x3 165 | sensor_height : height of the sensor 166 | camera_elevation_degree : camera elevation to rectify. 167 | Output: 168 | XYZ : ...x3 169 | """ 170 | R = ru.get_r_matrix( 171 | [1., 0., 0.], angle=np.deg2rad(camera_elevation_degree)) 172 | XYZ = torch.matmul(XYZ.reshape(-1, 3), 173 | torch.from_numpy(R).float().transpose(1, 0).to(device) 174 | ).reshape(XYZ.shape) 175 | XYZ[..., 2] = XYZ[..., 2] + sensor_height 176 | return XYZ 177 | 178 | 179 | def transform_pose_t(XYZ, current_pose, device): 180 | """ 181 | Transforms the point cloud into geocentric frame to account for 182 | camera position 183 | Input: 184 | XYZ : ...x3 185 | current_pose : camera position (x, y, theta (radians)) 186 | Output: 187 | XYZ : ...x3 188 | """ 189 | R = ru.get_r_matrix([0., 0., 1.], angle=current_pose[2] - np.pi / 2.) 190 | XYZ = torch.matmul(XYZ.reshape(-1, 3), 191 | torch.from_numpy(R).float().transpose(1, 0).to(device) 192 | ).reshape(XYZ.shape) 193 | XYZ[..., 0] += current_pose[0] 194 | XYZ[..., 1] += current_pose[1] 195 | return XYZ 196 | 197 | 198 | def splat_feat_nd(init_grid, feat, coords): 199 | """ 200 | Args: 201 | init_grid: B X nF X W X H X D X .. 202 | feat: B X nF X nPt 203 | coords: B X nDims X nPt in [-1, 1] 204 | Returns: 205 | grid: B X nF X W X H X D X .. 206 | """ 207 | wts_dim = [] 208 | pos_dim = [] 209 | grid_dims = init_grid.shape[2:] 210 | 211 | B = init_grid.shape[0] 212 | F = init_grid.shape[1] 213 | 214 | n_dims = len(grid_dims) 215 | 216 | grid_flat = init_grid.view(B, F, -1) 217 | 218 | for d in range(n_dims): 219 | pos = coords[:, [d], :] * grid_dims[d] / 2 + grid_dims[d] / 2 220 | pos_d = [] 221 | wts_d = [] 222 | 223 | for ix in [0, 1]: 224 | pos_ix = torch.floor(pos) + ix 225 | safe_ix = (pos_ix > 0) & (pos_ix < grid_dims[d]) 226 | safe_ix = safe_ix.type(pos.dtype) 227 | 228 | wts_ix = 1 - torch.abs(pos - pos_ix) 229 | 230 | wts_ix = wts_ix * safe_ix 231 | pos_ix = pos_ix * safe_ix 232 | 233 | pos_d.append(pos_ix) 234 | wts_d.append(wts_ix) 235 | 236 | pos_dim.append(pos_d) 237 | wts_dim.append(wts_d) 238 | 239 | l_ix = [[0, 1] for d in range(n_dims)] 240 | 241 | for ix_d in itertools.product(*l_ix): 242 | wts = torch.ones_like(wts_dim[0][0]) 243 | index = torch.zeros_like(wts_dim[0][0]) 244 | for d in range(n_dims): 245 | index = index * grid_dims[d] + pos_dim[d][ix_d[d]] 246 | wts = wts * wts_dim[d][ix_d[d]] 247 | 248 | index = index.long() 249 | grid_flat.scatter_add_(2, index.expand(-1, F, -1), feat * wts) 250 | grid_flat = torch.round(grid_flat) 251 | 252 | return grid_flat.view(init_grid.shape) 253 | -------------------------------------------------------------------------------- /envs/utils/fmm_planner.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import skfmm 4 | import skimage 5 | from numpy import ma 6 | 7 | 8 | def get_mask(sx, sy, scale, step_size): 9 | size = int(step_size // scale) * 2 + 1 10 | mask = np.zeros((size, size)) 11 | for i in range(size): 12 | for j in range(size): 13 | if ((i + 0.5) - (size // 2 + sx)) ** 2 + \ 14 | ((j + 0.5) - (size // 2 + sy)) ** 2 <= \ 15 | step_size ** 2 \ 16 | and ((i + 0.5) - (size // 2 + sx)) ** 2 + \ 17 | ((j + 0.5) - (size // 2 + sy)) ** 2 > \ 18 | (step_size - 1) ** 2: 19 | mask[i, j] = 1 20 | 21 | mask[size // 2, size // 2] = 1 22 | return mask 23 | 24 | 25 | def get_dist(sx, sy, scale, step_size): 26 | size = int(step_size // scale) * 2 + 1 27 | mask = np.zeros((size, size)) + 1e-10 28 | for i in range(size): 29 | for j in range(size): 30 | if ((i + 0.5) - (size // 2 + sx)) ** 2 + \ 31 | ((j + 0.5) - (size // 2 + sy)) ** 2 <= \ 32 | step_size ** 2: 33 | mask[i, j] = max(5, 34 | (((i + 0.5) - (size // 2 + sx)) ** 2 + 35 | ((j + 0.5) - (size // 2 + sy)) ** 2) ** 0.5) 36 | return mask 37 | 38 | 39 | class FMMPlanner(): 40 | def __init__(self, traversible, scale=1, step_size=5): 41 | self.scale = scale 42 | self.step_size = step_size 43 | if scale != 1.: 44 | self.traversible = cv2.resize(traversible, 45 | (traversible.shape[1] // scale, 46 | traversible.shape[0] // scale), 47 | interpolation=cv2.INTER_NEAREST) 48 | self.traversible = np.rint(self.traversible) 49 | else: 50 | self.traversible = traversible 51 | 52 | self.du = int(self.step_size / (self.scale * 1.)) 53 | self.fmm_dist = None 54 | 55 | def set_goal(self, goal, auto_improve=False): 56 | traversible_ma = ma.masked_values(self.traversible * 1, 0) 57 | goal_x, goal_y = int(goal[0] / (self.scale * 1.)), \ 58 | int(goal[1] / (self.scale * 1.)) 59 | 60 | if self.traversible[goal_x, goal_y] == 0. and auto_improve: 61 | goal_x, goal_y = self._find_nearest_goal([goal_x, goal_y]) 62 | 63 | traversible_ma[goal_x, goal_y] = 0 64 | dd = skfmm.distance(traversible_ma, dx=1) 65 | dd = ma.filled(dd, np.max(dd) + 1) 66 | self.fmm_dist = dd 67 | return 68 | 69 | def set_multi_goal(self, goal_map): 70 | traversible_ma = ma.masked_values(self.traversible * 1, 0) 71 | traversible_ma[goal_map == 1] = 0 72 | dd = skfmm.distance(traversible_ma, dx=1) 73 | dd = ma.filled(dd, np.max(dd) + 1) 74 | self.fmm_dist = dd 75 | return 76 | 77 | def get_short_term_goal(self, state): 78 | scale = self.scale * 1. 79 | state = [x / scale for x in state] 80 | dx, dy = state[0] - int(state[0]), state[1] - int(state[1]) 81 | mask = get_mask(dx, dy, scale, self.step_size) 82 | dist_mask = get_dist(dx, dy, scale, self.step_size) 83 | 84 | state = [int(x) for x in state] 85 | 86 | dist = np.pad(self.fmm_dist, self.du, 87 | 'constant', constant_values=self.fmm_dist.shape[0] ** 2) 88 | subset = dist[state[0]:state[0] + 2 * self.du + 1, 89 | state[1]:state[1] + 2 * self.du + 1] 90 | 91 | assert subset.shape[0] == 2 * self.du + 1 and \ 92 | subset.shape[1] == 2 * self.du + 1, \ 93 | "Planning error: unexpected subset shape {}".format(subset.shape) 94 | 95 | subset *= mask 96 | subset += (1 - mask) * self.fmm_dist.shape[0] ** 2 97 | 98 | if subset[self.du, self.du] < 0.25 * 100 / 5.: # 25cm 99 | stop = True 100 | else: 101 | stop = False 102 | 103 | subset -= subset[self.du, self.du] 104 | ratio1 = subset / dist_mask 105 | subset[ratio1 < -1.5] = 1 106 | 107 | (stg_x, stg_y) = np.unravel_index(np.argmin(subset), subset.shape) 108 | 109 | if subset[stg_x, stg_y] > -0.0001: 110 | replan = True 111 | else: 112 | replan = False 113 | 114 | return (stg_x + state[0] - self.du) * scale, \ 115 | (stg_y + state[1] - self.du) * scale, replan, stop 116 | 117 | def _find_nearest_goal(self, goal): 118 | traversible = skimage.morphology.binary_dilation( 119 | np.zeros(self.traversible.shape), 120 | skimage.morphology.disk(2)) != True 121 | traversible = traversible * 1. 122 | planner = FMMPlanner(traversible) 123 | planner.set_goal(goal) 124 | 125 | mask = self.traversible 126 | 127 | dist_map = planner.fmm_dist * mask 128 | dist_map[dist_map == 0] = dist_map.max() 129 | 130 | goal = np.unravel_index(dist_map.argmin(), dist_map.shape) 131 | 132 | return goal 133 | -------------------------------------------------------------------------------- /envs/utils/map_builder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import envs.utils.depth_utils as du 3 | 4 | 5 | class MapBuilder(object): 6 | def __init__(self, params): 7 | self.params = params 8 | frame_width = params['frame_width'] 9 | frame_height = params['frame_height'] 10 | fov = params['fov'] 11 | self.camera_matrix = du.get_camera_matrix( 12 | frame_width, 13 | frame_height, 14 | fov) 15 | self.vision_range = params['vision_range'] 16 | 17 | self.map_size_cm = params['map_size_cm'] 18 | self.resolution = params['resolution'] 19 | agent_min_z = params['agent_min_z'] 20 | agent_max_z = params['agent_max_z'] 21 | self.z_bins = [agent_min_z, agent_max_z] 22 | self.du_scale = params['du_scale'] 23 | self.visualize = params['visualize'] 24 | self.obs_threshold = params['obs_threshold'] 25 | 26 | self.map = np.zeros((self.map_size_cm // self.resolution, 27 | self.map_size_cm // self.resolution, 28 | len(self.z_bins) + 1), dtype=np.float32) 29 | 30 | self.agent_height = params['agent_height'] 31 | self.agent_view_angle = params['agent_view_angle'] 32 | return 33 | 34 | def update_map(self, depth, current_pose): 35 | with np.errstate(invalid="ignore"): 36 | depth[depth > self.vision_range * self.resolution] = np.NaN 37 | point_cloud = du.get_point_cloud_from_z(depth, self.camera_matrix, 38 | scale=self.du_scale) 39 | 40 | agent_view = du.transform_camera_view(point_cloud, 41 | self.agent_height, 42 | self.agent_view_angle) 43 | 44 | shift_loc = [self.vision_range * self.resolution // 2, 0, np.pi / 2.0] 45 | agent_view_centered = du.transform_pose(agent_view, shift_loc) 46 | 47 | agent_view_flat = du.bin_points( 48 | agent_view_centered, 49 | self.vision_range, 50 | self.z_bins, 51 | self.resolution) 52 | 53 | agent_view_cropped = agent_view_flat[:, :, 1] 54 | agent_view_cropped = agent_view_cropped / self.obs_threshold 55 | agent_view_cropped[agent_view_cropped >= 0.5] = 1.0 56 | agent_view_cropped[agent_view_cropped < 0.5] = 0.0 57 | 58 | agent_view_explored = agent_view_flat.sum(2) 59 | agent_view_explored[agent_view_explored > 0] = 1.0 60 | 61 | geocentric_pc = du.transform_pose(agent_view, current_pose) 62 | 63 | geocentric_flat = du.bin_points( 64 | geocentric_pc, 65 | self.map.shape[0], 66 | self.z_bins, 67 | self.resolution) 68 | 69 | self.map = self.map + geocentric_flat 70 | 71 | map_gt = self.map[:, :, 1] / self.obs_threshold 72 | map_gt[map_gt >= 0.5] = 1.0 73 | map_gt[map_gt < 0.5] = 0.0 74 | 75 | explored_gt = self.map.sum(2) 76 | explored_gt[explored_gt > 1] = 1.0 77 | 78 | return agent_view_cropped, map_gt, agent_view_explored, explored_gt 79 | 80 | def get_st_pose(self, current_loc): 81 | loc = [- (current_loc[0] / self.resolution 82 | - self.map_size_cm // (self.resolution * 2)) / 83 | (self.map_size_cm // (self.resolution * 2)), 84 | - (current_loc[1] / self.resolution 85 | - self.map_size_cm // (self.resolution * 2)) / 86 | (self.map_size_cm // (self.resolution * 2)), 87 | 90 - np.rad2deg(current_loc[2])] 88 | return loc 89 | 90 | def reset_map(self, map_size): 91 | self.map_size_cm = map_size 92 | 93 | self.map = np.zeros((self.map_size_cm // self.resolution, 94 | self.map_size_cm // self.resolution, 95 | len(self.z_bins) + 1), dtype=np.float32) 96 | 97 | def get_map(self): 98 | return self.map 99 | -------------------------------------------------------------------------------- /envs/utils/pose.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_l2_distance(x1, x2, y1, y2): 5 | """ 6 | Computes the L2 distance between two points. 7 | """ 8 | return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5 9 | 10 | 11 | def get_rel_pose_change(pos2, pos1): 12 | x1, y1, o1 = pos1 13 | x2, y2, o2 = pos2 14 | 15 | theta = np.arctan2(y2 - y1, x2 - x1) - o1 16 | dist = get_l2_distance(x1, x2, y1, y2) 17 | dx = dist * np.cos(theta) 18 | dy = dist * np.sin(theta) 19 | do = o2 - o1 20 | 21 | return dx, dy, do 22 | 23 | 24 | def get_new_pose(pose, rel_pose_change): 25 | x, y, o = pose 26 | dx, dy, do = rel_pose_change 27 | 28 | global_dx = dx * np.sin(np.deg2rad(o)) + dy * np.cos(np.deg2rad(o)) 29 | global_dy = dx * np.cos(np.deg2rad(o)) - dy * np.sin(np.deg2rad(o)) 30 | x += global_dy 31 | y += global_dx 32 | o += np.rad2deg(do) 33 | if o > 180.: 34 | o -= 360. 35 | 36 | return x, y, o 37 | 38 | 39 | def threshold_poses(coords, shape): 40 | coords[0] = min(max(0, coords[0]), shape[0] - 1) 41 | coords[1] = min(max(0, coords[1]), shape[1] - 1) 42 | return coords 43 | -------------------------------------------------------------------------------- /envs/utils/rotation_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utilities for generating and applying rotation matrices. 17 | """ 18 | import numpy as np 19 | 20 | ANGLE_EPS = 0.001 21 | 22 | 23 | def normalize(v): 24 | return v / np.linalg.norm(v) 25 | 26 | 27 | def get_r_matrix(ax_, angle): 28 | ax = normalize(ax_) 29 | if np.abs(angle) > ANGLE_EPS: 30 | S_hat = np.array( 31 | [[0.0, -ax[2], ax[1]], [ax[2], 0.0, -ax[0]], [-ax[1], ax[0], 0.0]], 32 | dtype=np.float32) 33 | R = np.eye(3) + np.sin(angle) * S_hat + \ 34 | (1 - np.cos(angle)) * (np.linalg.matrix_power(S_hat, 2)) 35 | else: 36 | R = np.eye(3) 37 | return R 38 | 39 | 40 | def r_between(v_from_, v_to_): 41 | v_from = normalize(v_from_) 42 | v_to = normalize(v_to_) 43 | ax = normalize(np.cross(v_from, v_to)) 44 | angle = np.arccos(np.dot(v_from, v_to)) 45 | return get_r_matrix(ax, angle) 46 | 47 | 48 | def rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to): 49 | inputs = [up_from, lookat_from, up_to, lookat_to] 50 | for i in range(4): 51 | inputs[i] = normalize(np.array(inputs[i]).reshape((-1,))) 52 | up_from, lookat_from, up_to, lookat_to = inputs 53 | r1 = r_between(lookat_from, lookat_to) 54 | 55 | new_x = np.dot(r1, np.array([1, 0, 0]).reshape((-1, 1))).reshape((-1)) 56 | to_x = normalize(np.cross(lookat_to, up_to)) 57 | angle = np.arccos(np.dot(new_x, to_x)) 58 | if angle > ANGLE_EPS: 59 | if angle < np.pi - ANGLE_EPS: 60 | ax = normalize(np.cross(new_x, to_x)) 61 | flip = np.dot(lookat_to, ax) 62 | if flip > 0: 63 | r2 = get_r_matrix(lookat_to, angle) 64 | elif flip < 0: 65 | r2 = get_r_matrix(lookat_to, -1. * angle) 66 | else: 67 | # Angle of rotation is too close to 180 degrees, direction of 68 | # rotation does not matter. 69 | r2 = get_r_matrix(lookat_to, angle) 70 | else: 71 | r2 = np.eye(3) 72 | return np.dot(r2, r1) 73 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from collections import deque, defaultdict 2 | import os 3 | import logging 4 | import time 5 | import json 6 | import gym 7 | import torch.nn as nn 8 | import torch 9 | import numpy as np 10 | 11 | from model import RL_Policy, Semantic_Mapping 12 | from utils.storage import GlobalRolloutStorage 13 | from envs import make_vec_envs 14 | from arguments import get_args 15 | import algo 16 | 17 | os.environ["OMP_NUM_THREADS"] = "1" 18 | 19 | 20 | def main(): 21 | args = get_args() 22 | 23 | np.random.seed(args.seed) 24 | torch.manual_seed(args.seed) 25 | 26 | if args.cuda: 27 | torch.cuda.manual_seed(args.seed) 28 | 29 | # Setup Logging 30 | log_dir = "{}/models/{}/".format(args.dump_location, args.exp_name) 31 | dump_dir = "{}/dump/{}/".format(args.dump_location, args.exp_name) 32 | 33 | if not os.path.exists(log_dir): 34 | os.makedirs(log_dir) 35 | if not os.path.exists(dump_dir): 36 | os.makedirs(dump_dir) 37 | 38 | logging.basicConfig( 39 | filename=log_dir + 'train.log', 40 | level=logging.INFO) 41 | print("Dumping at {}".format(log_dir)) 42 | print(args) 43 | logging.info(args) 44 | 45 | # Logging and loss variables 46 | num_scenes = args.num_processes 47 | num_episodes = int(args.num_eval_episodes) 48 | device = args.device = torch.device("cuda:0" if args.cuda else "cpu") 49 | 50 | g_masks = torch.ones(num_scenes).float().to(device) 51 | 52 | best_g_reward = -np.inf 53 | 54 | if args.eval: 55 | episode_success = [] 56 | episode_spl = [] 57 | episode_dist = [] 58 | for _ in range(args.num_processes): 59 | episode_success.append(deque(maxlen=num_episodes)) 60 | episode_spl.append(deque(maxlen=num_episodes)) 61 | episode_dist.append(deque(maxlen=num_episodes)) 62 | 63 | else: 64 | episode_success = deque(maxlen=1000) 65 | episode_spl = deque(maxlen=1000) 66 | episode_dist = deque(maxlen=1000) 67 | 68 | finished = np.zeros((args.num_processes)) 69 | wait_env = np.zeros((args.num_processes)) 70 | 71 | g_episode_rewards = deque(maxlen=1000) 72 | 73 | g_value_losses = deque(maxlen=1000) 74 | g_action_losses = deque(maxlen=1000) 75 | g_dist_entropies = deque(maxlen=1000) 76 | 77 | per_step_g_rewards = deque(maxlen=1000) 78 | 79 | g_process_rewards = np.zeros((num_scenes)) 80 | 81 | # Starting environments 82 | torch.set_num_threads(1) 83 | envs = make_vec_envs(args) 84 | obs, infos = envs.reset() 85 | 86 | torch.set_grad_enabled(False) 87 | 88 | # Initialize map variables: 89 | # Full map consists of multiple channels containing the following: 90 | # 1. Obstacle Map 91 | # 2. Exploread Area 92 | # 3. Current Agent Location 93 | # 4. Past Agent Locations 94 | # 5,6,7,.. : Semantic Categories 95 | nc = args.num_sem_categories + 4 # num channels 96 | 97 | # Calculating full and local map sizes 98 | map_size = args.map_size_cm // args.map_resolution 99 | full_w, full_h = map_size, map_size 100 | local_w = int(full_w / args.global_downscaling) 101 | local_h = int(full_h / args.global_downscaling) 102 | 103 | # Initializing full and local map 104 | full_map = torch.zeros(num_scenes, nc, full_w, full_h).float().to(device) 105 | local_map = torch.zeros(num_scenes, nc, local_w, 106 | local_h).float().to(device) 107 | 108 | # Initial full and local pose 109 | full_pose = torch.zeros(num_scenes, 3).float().to(device) 110 | local_pose = torch.zeros(num_scenes, 3).float().to(device) 111 | 112 | # Origin of local map 113 | origins = np.zeros((num_scenes, 3)) 114 | 115 | # Local Map Boundaries 116 | lmb = np.zeros((num_scenes, 4)).astype(int) 117 | 118 | # Planner pose inputs has 7 dimensions 119 | # 1-3 store continuous global agent location 120 | # 4-7 store local map boundaries 121 | planner_pose_inputs = np.zeros((num_scenes, 7)) 122 | 123 | def get_local_map_boundaries(agent_loc, local_sizes, full_sizes): 124 | loc_r, loc_c = agent_loc 125 | local_w, local_h = local_sizes 126 | full_w, full_h = full_sizes 127 | 128 | if args.global_downscaling > 1: 129 | gx1, gy1 = loc_r - local_w // 2, loc_c - local_h // 2 130 | gx2, gy2 = gx1 + local_w, gy1 + local_h 131 | if gx1 < 0: 132 | gx1, gx2 = 0, local_w 133 | if gx2 > full_w: 134 | gx1, gx2 = full_w - local_w, full_w 135 | 136 | if gy1 < 0: 137 | gy1, gy2 = 0, local_h 138 | if gy2 > full_h: 139 | gy1, gy2 = full_h - local_h, full_h 140 | else: 141 | gx1, gx2, gy1, gy2 = 0, full_w, 0, full_h 142 | 143 | return [gx1, gx2, gy1, gy2] 144 | 145 | def init_map_and_pose(): 146 | full_map.fill_(0.) 147 | full_pose.fill_(0.) 148 | full_pose[:, :2] = args.map_size_cm / 100.0 / 2.0 149 | 150 | locs = full_pose.cpu().numpy() 151 | planner_pose_inputs[:, :3] = locs 152 | for e in range(num_scenes): 153 | r, c = locs[e, 1], locs[e, 0] 154 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution), 155 | int(c * 100.0 / args.map_resolution)] 156 | 157 | full_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0 158 | 159 | lmb[e] = get_local_map_boundaries((loc_r, loc_c), 160 | (local_w, local_h), 161 | (full_w, full_h)) 162 | 163 | planner_pose_inputs[e, 3:] = lmb[e] 164 | origins[e] = [lmb[e][2] * args.map_resolution / 100.0, 165 | lmb[e][0] * args.map_resolution / 100.0, 0.] 166 | 167 | for e in range(num_scenes): 168 | local_map[e] = full_map[e, :, 169 | lmb[e, 0]:lmb[e, 1], 170 | lmb[e, 2]:lmb[e, 3]] 171 | local_pose[e] = full_pose[e] - \ 172 | torch.from_numpy(origins[e]).to(device).float() 173 | 174 | def init_map_and_pose_for_env(e): 175 | full_map[e].fill_(0.) 176 | full_pose[e].fill_(0.) 177 | full_pose[e, :2] = args.map_size_cm / 100.0 / 2.0 178 | 179 | locs = full_pose[e].cpu().numpy() 180 | planner_pose_inputs[e, :3] = locs 181 | r, c = locs[1], locs[0] 182 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution), 183 | int(c * 100.0 / args.map_resolution)] 184 | 185 | full_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0 186 | 187 | lmb[e] = get_local_map_boundaries((loc_r, loc_c), 188 | (local_w, local_h), 189 | (full_w, full_h)) 190 | 191 | planner_pose_inputs[e, 3:] = lmb[e] 192 | origins[e] = [lmb[e][2] * args.map_resolution / 100.0, 193 | lmb[e][0] * args.map_resolution / 100.0, 0.] 194 | 195 | local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] 196 | local_pose[e] = full_pose[e] - \ 197 | torch.from_numpy(origins[e]).to(device).float() 198 | 199 | def update_intrinsic_rew(e): 200 | prev_explored_area = full_map[e, 1].sum(1).sum(0) 201 | full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \ 202 | local_map[e] 203 | curr_explored_area = full_map[e, 1].sum(1).sum(0) 204 | intrinsic_rews[e] = curr_explored_area - prev_explored_area 205 | intrinsic_rews[e] *= (args.map_resolution / 100.)**2 # to m^2 206 | 207 | init_map_and_pose() 208 | 209 | # Global policy observation space 210 | ngc = 8 + args.num_sem_categories 211 | es = 2 212 | g_observation_space = gym.spaces.Box(0, 1, 213 | (ngc, 214 | local_w, 215 | local_h), dtype='uint8') 216 | 217 | # Global policy action space 218 | g_action_space = gym.spaces.Box(low=0.0, high=0.99, 219 | shape=(2,), dtype=np.float32) 220 | 221 | # Global policy recurrent layer size 222 | g_hidden_size = args.global_hidden_size 223 | 224 | # Semantic Mapping 225 | sem_map_module = Semantic_Mapping(args).to(device) 226 | sem_map_module.eval() 227 | 228 | # Global policy 229 | g_policy = RL_Policy(g_observation_space.shape, g_action_space, 230 | model_type=1, 231 | base_kwargs={'recurrent': args.use_recurrent_global, 232 | 'hidden_size': g_hidden_size, 233 | 'num_sem_categories': ngc - 8 234 | }).to(device) 235 | g_agent = algo.PPO(g_policy, args.clip_param, args.ppo_epoch, 236 | args.num_mini_batch, args.value_loss_coef, 237 | args.entropy_coef, lr=args.lr, eps=args.eps, 238 | max_grad_norm=args.max_grad_norm) 239 | 240 | global_input = torch.zeros(num_scenes, ngc, local_w, local_h) 241 | global_orientation = torch.zeros(num_scenes, 1).long() 242 | intrinsic_rews = torch.zeros(num_scenes).to(device) 243 | extras = torch.zeros(num_scenes, 2) 244 | 245 | # Storage 246 | g_rollouts = GlobalRolloutStorage(args.num_global_steps, 247 | num_scenes, g_observation_space.shape, 248 | g_action_space, g_policy.rec_state_size, 249 | es).to(device) 250 | 251 | if args.load != "0": 252 | print("Loading model {}".format(args.load)) 253 | state_dict = torch.load(args.load, 254 | map_location=lambda storage, loc: storage) 255 | g_policy.load_state_dict(state_dict) 256 | 257 | if args.eval: 258 | g_policy.eval() 259 | 260 | # Predict semantic map from frame 1 261 | poses = torch.from_numpy(np.asarray( 262 | [infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes)]) 263 | ).float().to(device) 264 | 265 | _, local_map, _, local_pose = \ 266 | sem_map_module(obs, poses, local_map, local_pose) 267 | 268 | # Compute Global policy input 269 | locs = local_pose.cpu().numpy() 270 | global_input = torch.zeros(num_scenes, ngc, local_w, local_h) 271 | global_orientation = torch.zeros(num_scenes, 1).long() 272 | 273 | for e in range(num_scenes): 274 | r, c = locs[e, 1], locs[e, 0] 275 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution), 276 | int(c * 100.0 / args.map_resolution)] 277 | 278 | local_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1. 279 | global_orientation[e] = int((locs[e, 2] + 180.0) / 5.) 280 | 281 | global_input[:, 0:4, :, :] = local_map[:, 0:4, :, :].detach() 282 | global_input[:, 4:8, :, :] = nn.MaxPool2d(args.global_downscaling)( 283 | full_map[:, 0:4, :, :]) 284 | global_input[:, 8:, :, :] = local_map[:, 4:, :, :].detach() 285 | goal_cat_id = torch.from_numpy(np.asarray( 286 | [infos[env_idx]['goal_cat_id'] for env_idx 287 | in range(num_scenes)])) 288 | 289 | extras = torch.zeros(num_scenes, 2) 290 | extras[:, 0] = global_orientation[:, 0] 291 | extras[:, 1] = goal_cat_id 292 | 293 | g_rollouts.obs[0].copy_(global_input) 294 | g_rollouts.extras[0].copy_(extras) 295 | 296 | # Run Global Policy (global_goals = Long-Term Goal) 297 | g_value, g_action, g_action_log_prob, g_rec_states = \ 298 | g_policy.act( 299 | g_rollouts.obs[0], 300 | g_rollouts.rec_states[0], 301 | g_rollouts.masks[0], 302 | extras=g_rollouts.extras[0], 303 | deterministic=False 304 | ) 305 | 306 | cpu_actions = nn.Sigmoid()(g_action).cpu().numpy() 307 | global_goals = [[int(action[0] * local_w), int(action[1] * local_h)] 308 | for action in cpu_actions] 309 | global_goals = [[min(x, int(local_w - 1)), min(y, int(local_h - 1))] 310 | for x, y in global_goals] 311 | 312 | goal_maps = [np.zeros((local_w, local_h)) for _ in range(num_scenes)] 313 | 314 | for e in range(num_scenes): 315 | goal_maps[e][global_goals[e][0], global_goals[e][1]] = 1 316 | 317 | planner_inputs = [{} for e in range(num_scenes)] 318 | for e, p_input in enumerate(planner_inputs): 319 | p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy() 320 | p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy() 321 | p_input['pose_pred'] = planner_pose_inputs[e] 322 | p_input['goal'] = goal_maps[e] # global_goals[e] 323 | p_input['new_goal'] = 1 324 | p_input['found_goal'] = 0 325 | p_input['wait'] = wait_env[e] or finished[e] 326 | if args.visualize or args.print_images: 327 | local_map[e, -1, :, :] = 1e-5 328 | p_input['sem_map_pred'] = local_map[e, 4:, :, : 329 | ].argmax(0).cpu().numpy() 330 | 331 | obs, _, done, infos = envs.plan_act_and_preprocess(planner_inputs) 332 | 333 | start = time.time() 334 | g_reward = 0 335 | 336 | torch.set_grad_enabled(False) 337 | spl_per_category = defaultdict(list) 338 | success_per_category = defaultdict(list) 339 | 340 | for step in range(args.num_training_frames // args.num_processes + 1): 341 | if finished.sum() == args.num_processes: 342 | break 343 | 344 | g_step = (step // args.num_local_steps) % args.num_global_steps 345 | l_step = step % args.num_local_steps 346 | 347 | # ------------------------------------------------------------------ 348 | # Reinitialize variables when episode ends 349 | l_masks = torch.FloatTensor([0 if x else 1 350 | for x in done]).to(device) 351 | g_masks *= l_masks 352 | 353 | for e, x in enumerate(done): 354 | if x: 355 | spl = infos[e]['spl'] 356 | success = infos[e]['success'] 357 | dist = infos[e]['distance_to_goal'] 358 | spl_per_category[infos[e]['goal_name']].append(spl) 359 | success_per_category[infos[e]['goal_name']].append(success) 360 | if args.eval: 361 | episode_success[e].append(success) 362 | episode_spl[e].append(spl) 363 | episode_dist[e].append(dist) 364 | if len(episode_success[e]) == num_episodes: 365 | finished[e] = 1 366 | else: 367 | episode_success.append(success) 368 | episode_spl.append(spl) 369 | episode_dist.append(dist) 370 | wait_env[e] = 1. 371 | update_intrinsic_rew(e) 372 | init_map_and_pose_for_env(e) 373 | # ------------------------------------------------------------------ 374 | 375 | # ------------------------------------------------------------------ 376 | # Semantic Mapping Module 377 | poses = torch.from_numpy(np.asarray( 378 | [infos[env_idx]['sensor_pose'] for env_idx 379 | in range(num_scenes)]) 380 | ).float().to(device) 381 | 382 | _, local_map, _, local_pose = \ 383 | sem_map_module(obs, poses, local_map, local_pose) 384 | 385 | locs = local_pose.cpu().numpy() 386 | planner_pose_inputs[:, :3] = locs + origins 387 | local_map[:, 2, :, :].fill_(0.) # Resetting current location channel 388 | for e in range(num_scenes): 389 | r, c = locs[e, 1], locs[e, 0] 390 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution), 391 | int(c * 100.0 / args.map_resolution)] 392 | local_map[e, 2:4, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1. 393 | 394 | # ------------------------------------------------------------------ 395 | 396 | # ------------------------------------------------------------------ 397 | # Global Policy 398 | if l_step == args.num_local_steps - 1: 399 | # For every global step, update the full and local maps 400 | for e in range(num_scenes): 401 | if wait_env[e] == 1: # New episode 402 | wait_env[e] = 0. 403 | else: 404 | update_intrinsic_rew(e) 405 | 406 | full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \ 407 | local_map[e] 408 | full_pose[e] = local_pose[e] + \ 409 | torch.from_numpy(origins[e]).to(device).float() 410 | 411 | locs = full_pose[e].cpu().numpy() 412 | r, c = locs[1], locs[0] 413 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution), 414 | int(c * 100.0 / args.map_resolution)] 415 | 416 | lmb[e] = get_local_map_boundaries((loc_r, loc_c), 417 | (local_w, local_h), 418 | (full_w, full_h)) 419 | 420 | planner_pose_inputs[e, 3:] = lmb[e] 421 | origins[e] = [lmb[e][2] * args.map_resolution / 100.0, 422 | lmb[e][0] * args.map_resolution / 100.0, 0.] 423 | 424 | local_map[e] = full_map[e, :, 425 | lmb[e, 0]:lmb[e, 1], 426 | lmb[e, 2]:lmb[e, 3]] 427 | local_pose[e] = full_pose[e] - \ 428 | torch.from_numpy(origins[e]).to(device).float() 429 | 430 | locs = local_pose.cpu().numpy() 431 | for e in range(num_scenes): 432 | global_orientation[e] = int((locs[e, 2] + 180.0) / 5.) 433 | global_input[:, 0:4, :, :] = local_map[:, 0:4, :, :] 434 | global_input[:, 4:8, :, :] = \ 435 | nn.MaxPool2d(args.global_downscaling)( 436 | full_map[:, 0:4, :, :]) 437 | global_input[:, 8:, :, :] = local_map[:, 4:, :, :].detach() 438 | goal_cat_id = torch.from_numpy(np.asarray( 439 | [infos[env_idx]['goal_cat_id'] for env_idx 440 | in range(num_scenes)])) 441 | extras[:, 0] = global_orientation[:, 0] 442 | extras[:, 1] = goal_cat_id 443 | 444 | # Get exploration reward and metrics 445 | g_reward = torch.from_numpy(np.asarray( 446 | [infos[env_idx]['g_reward'] for env_idx in range(num_scenes)]) 447 | ).float().to(device) 448 | g_reward += args.intrinsic_rew_coeff * intrinsic_rews.detach() 449 | 450 | g_process_rewards += g_reward.cpu().numpy() 451 | g_total_rewards = g_process_rewards * \ 452 | (1 - g_masks.cpu().numpy()) 453 | g_process_rewards *= g_masks.cpu().numpy() 454 | per_step_g_rewards.append(np.mean(g_reward.cpu().numpy())) 455 | 456 | if np.sum(g_total_rewards) != 0: 457 | for total_rew in g_total_rewards: 458 | if total_rew != 0: 459 | g_episode_rewards.append(total_rew) 460 | 461 | # Add samples to global policy storage 462 | if step == 0: 463 | g_rollouts.obs[0].copy_(global_input) 464 | g_rollouts.extras[0].copy_(extras) 465 | else: 466 | g_rollouts.insert( 467 | global_input, g_rec_states, 468 | g_action, g_action_log_prob, g_value, 469 | g_reward, g_masks, extras 470 | ) 471 | 472 | # Sample long-term goal from global policy 473 | g_value, g_action, g_action_log_prob, g_rec_states = \ 474 | g_policy.act( 475 | g_rollouts.obs[g_step + 1], 476 | g_rollouts.rec_states[g_step + 1], 477 | g_rollouts.masks[g_step + 1], 478 | extras=g_rollouts.extras[g_step + 1], 479 | deterministic=False 480 | ) 481 | cpu_actions = nn.Sigmoid()(g_action).cpu().numpy() 482 | global_goals = [[int(action[0] * local_w), 483 | int(action[1] * local_h)] 484 | for action in cpu_actions] 485 | global_goals = [[min(x, int(local_w - 1)), 486 | min(y, int(local_h - 1))] 487 | for x, y in global_goals] 488 | 489 | g_reward = 0 490 | g_masks = torch.ones(num_scenes).float().to(device) 491 | 492 | # ------------------------------------------------------------------ 493 | 494 | # ------------------------------------------------------------------ 495 | # Update long-term goal if target object is found 496 | found_goal = [0 for _ in range(num_scenes)] 497 | goal_maps = [np.zeros((local_w, local_h)) for _ in range(num_scenes)] 498 | 499 | for e in range(num_scenes): 500 | goal_maps[e][global_goals[e][0], global_goals[e][1]] = 1 501 | 502 | for e in range(num_scenes): 503 | cn = infos[e]['goal_cat_id'] + 4 504 | if local_map[e, cn, :, :].sum() != 0.: 505 | cat_semantic_map = local_map[e, cn, :, :].cpu().numpy() 506 | cat_semantic_scores = cat_semantic_map 507 | cat_semantic_scores[cat_semantic_scores > 0] = 1. 508 | goal_maps[e] = cat_semantic_scores 509 | found_goal[e] = 1 510 | # ------------------------------------------------------------------ 511 | 512 | # ------------------------------------------------------------------ 513 | # Take action and get next observation 514 | planner_inputs = [{} for e in range(num_scenes)] 515 | for e, p_input in enumerate(planner_inputs): 516 | p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy() 517 | p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy() 518 | p_input['pose_pred'] = planner_pose_inputs[e] 519 | p_input['goal'] = goal_maps[e] # global_goals[e] 520 | p_input['new_goal'] = l_step == args.num_local_steps - 1 521 | p_input['found_goal'] = found_goal[e] 522 | p_input['wait'] = wait_env[e] or finished[e] 523 | if args.visualize or args.print_images: 524 | local_map[e, -1, :, :] = 1e-5 525 | p_input['sem_map_pred'] = local_map[e, 4:, :, 526 | :].argmax(0).cpu().numpy() 527 | 528 | obs, _, done, infos = envs.plan_act_and_preprocess(planner_inputs) 529 | # ------------------------------------------------------------------ 530 | 531 | # ------------------------------------------------------------------ 532 | # Training 533 | torch.set_grad_enabled(True) 534 | if g_step % args.num_global_steps == args.num_global_steps - 1 \ 535 | and l_step == args.num_local_steps - 1: 536 | if not args.eval: 537 | g_next_value = g_policy.get_value( 538 | g_rollouts.obs[-1], 539 | g_rollouts.rec_states[-1], 540 | g_rollouts.masks[-1], 541 | extras=g_rollouts.extras[-1] 542 | ).detach() 543 | 544 | g_rollouts.compute_returns(g_next_value, args.use_gae, 545 | args.gamma, args.tau) 546 | g_value_loss, g_action_loss, g_dist_entropy = \ 547 | g_agent.update(g_rollouts) 548 | g_value_losses.append(g_value_loss) 549 | g_action_losses.append(g_action_loss) 550 | g_dist_entropies.append(g_dist_entropy) 551 | g_rollouts.after_update() 552 | 553 | torch.set_grad_enabled(False) 554 | # ------------------------------------------------------------------ 555 | 556 | # ------------------------------------------------------------------ 557 | # Logging 558 | if step % args.log_interval == 0: 559 | end = time.time() 560 | time_elapsed = time.gmtime(end - start) 561 | log = " ".join([ 562 | "Time: {0:0=2d}d".format(time_elapsed.tm_mday - 1), 563 | "{},".format(time.strftime("%Hh %Mm %Ss", time_elapsed)), 564 | "num timesteps {},".format(step * num_scenes), 565 | "FPS {},".format(int(step * num_scenes / (end - start))) 566 | ]) 567 | 568 | log += "\n\tRewards:" 569 | 570 | if len(g_episode_rewards) > 0: 571 | log += " ".join([ 572 | " Global step mean/med rew:", 573 | "{:.4f}/{:.4f},".format( 574 | np.mean(per_step_g_rewards), 575 | np.median(per_step_g_rewards)), 576 | " Global eps mean/med/min/max eps rew:", 577 | "{:.3f}/{:.3f}/{:.3f}/{:.3f},".format( 578 | np.mean(g_episode_rewards), 579 | np.median(g_episode_rewards), 580 | np.min(g_episode_rewards), 581 | np.max(g_episode_rewards)) 582 | ]) 583 | 584 | if args.eval: 585 | total_success = [] 586 | total_spl = [] 587 | total_dist = [] 588 | for e in range(args.num_processes): 589 | for acc in episode_success[e]: 590 | total_success.append(acc) 591 | for dist in episode_dist[e]: 592 | total_dist.append(dist) 593 | for spl in episode_spl[e]: 594 | total_spl.append(spl) 595 | 596 | if len(total_spl) > 0: 597 | log += " ObjectNav succ/spl/dtg:" 598 | log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format( 599 | np.mean(total_success), 600 | np.mean(total_spl), 601 | np.mean(total_dist), 602 | len(total_spl)) 603 | else: 604 | if len(episode_success) > 100: 605 | log += " ObjectNav succ/spl/dtg:" 606 | log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format( 607 | np.mean(episode_success), 608 | np.mean(episode_spl), 609 | np.mean(episode_dist), 610 | len(episode_spl)) 611 | 612 | log += "\n\tLosses:" 613 | if len(g_value_losses) > 0 and not args.eval: 614 | log += " ".join([ 615 | " Policy Loss value/action/dist:", 616 | "{:.3f}/{:.3f}/{:.3f},".format( 617 | np.mean(g_value_losses), 618 | np.mean(g_action_losses), 619 | np.mean(g_dist_entropies)) 620 | ]) 621 | 622 | print(log) 623 | logging.info(log) 624 | # ------------------------------------------------------------------ 625 | 626 | # ------------------------------------------------------------------ 627 | # Save best models 628 | if (step * num_scenes) % args.save_interval < \ 629 | num_scenes: 630 | if len(g_episode_rewards) >= 1000 and \ 631 | (np.mean(g_episode_rewards) >= best_g_reward) \ 632 | and not args.eval: 633 | torch.save(g_policy.state_dict(), 634 | os.path.join(log_dir, "model_best.pth")) 635 | best_g_reward = np.mean(g_episode_rewards) 636 | 637 | # Save periodic models 638 | if (step * num_scenes) % args.save_periodic < \ 639 | num_scenes: 640 | total_steps = step * num_scenes 641 | if not args.eval: 642 | torch.save(g_policy.state_dict(), 643 | os.path.join(dump_dir, 644 | "periodic_{}.pth".format(total_steps))) 645 | # ------------------------------------------------------------------ 646 | 647 | # Print and save model performance numbers during evaluation 648 | if args.eval: 649 | print("Dumping eval details...") 650 | 651 | total_success = [] 652 | total_spl = [] 653 | total_dist = [] 654 | for e in range(args.num_processes): 655 | for acc in episode_success[e]: 656 | total_success.append(acc) 657 | for dist in episode_dist[e]: 658 | total_dist.append(dist) 659 | for spl in episode_spl[e]: 660 | total_spl.append(spl) 661 | 662 | if len(total_spl) > 0: 663 | log = "Final ObjectNav succ/spl/dtg:" 664 | log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format( 665 | np.mean(total_success), 666 | np.mean(total_spl), 667 | np.mean(total_dist), 668 | len(total_spl)) 669 | 670 | print(log) 671 | logging.info(log) 672 | 673 | # Save the spl per category 674 | log = "Success | SPL per category\n" 675 | for key in success_per_category: 676 | log += "{}: {} | {}\n".format(key, 677 | sum(success_per_category[key]) / 678 | len(success_per_category[key]), 679 | sum(spl_per_category[key]) / 680 | len(spl_per_category[key])) 681 | 682 | print(log) 683 | logging.info(log) 684 | 685 | with open('{}/{}_spl_per_cat_pred_thr.json'.format( 686 | dump_dir, args.split), 'w') as f: 687 | json.dump(spl_per_category, f) 688 | 689 | with open('{}/{}_success_per_cat_pred_thr.json'.format( 690 | dump_dir, args.split), 'w') as f: 691 | json.dump(success_per_category, f) 692 | 693 | 694 | if __name__ == "__main__": 695 | main() 696 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import functional as F 4 | import numpy as np 5 | 6 | from utils.distributions import Categorical, DiagGaussian 7 | from utils.model import get_grid, ChannelPool, Flatten, NNBase 8 | import envs.utils.depth_utils as du 9 | 10 | 11 | class Goal_Oriented_Semantic_Policy(NNBase): 12 | 13 | def __init__(self, input_shape, recurrent=False, hidden_size=512, 14 | num_sem_categories=16): 15 | super(Goal_Oriented_Semantic_Policy, self).__init__( 16 | recurrent, hidden_size, hidden_size) 17 | 18 | out_size = int(input_shape[1] / 16.) * int(input_shape[2] / 16.) 19 | 20 | self.main = nn.Sequential( 21 | nn.MaxPool2d(2), 22 | nn.Conv2d(num_sem_categories + 8, 32, 3, stride=1, padding=1), 23 | nn.ReLU(), 24 | nn.MaxPool2d(2), 25 | nn.Conv2d(32, 64, 3, stride=1, padding=1), 26 | nn.ReLU(), 27 | nn.MaxPool2d(2), 28 | nn.Conv2d(64, 128, 3, stride=1, padding=1), 29 | nn.ReLU(), 30 | nn.MaxPool2d(2), 31 | nn.Conv2d(128, 64, 3, stride=1, padding=1), 32 | nn.ReLU(), 33 | nn.Conv2d(64, 32, 3, stride=1, padding=1), 34 | nn.ReLU(), 35 | Flatten() 36 | ) 37 | 38 | self.linear1 = nn.Linear(out_size * 32 + 8 * 2, hidden_size) 39 | self.linear2 = nn.Linear(hidden_size, 256) 40 | self.critic_linear = nn.Linear(256, 1) 41 | self.orientation_emb = nn.Embedding(72, 8) 42 | self.goal_emb = nn.Embedding(num_sem_categories, 8) 43 | self.train() 44 | 45 | def forward(self, inputs, rnn_hxs, masks, extras): 46 | x = self.main(inputs) 47 | orientation_emb = self.orientation_emb(extras[:, 0]) 48 | goal_emb = self.goal_emb(extras[:, 1]) 49 | 50 | x = torch.cat((x, orientation_emb, goal_emb), 1) 51 | 52 | x = nn.ReLU()(self.linear1(x)) 53 | if self.is_recurrent: 54 | x, rnn_hxs = self._forward_gru(x, rnn_hxs, masks) 55 | 56 | x = nn.ReLU()(self.linear2(x)) 57 | 58 | return self.critic_linear(x).squeeze(-1), x, rnn_hxs 59 | 60 | 61 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L15 62 | class RL_Policy(nn.Module): 63 | 64 | def __init__(self, obs_shape, action_space, model_type=0, 65 | base_kwargs=None): 66 | 67 | super(RL_Policy, self).__init__() 68 | if base_kwargs is None: 69 | base_kwargs = {} 70 | 71 | if model_type == 1: 72 | self.network = Goal_Oriented_Semantic_Policy( 73 | obs_shape, **base_kwargs) 74 | else: 75 | raise NotImplementedError 76 | 77 | if action_space.__class__.__name__ == "Discrete": 78 | num_outputs = action_space.n 79 | self.dist = Categorical(self.network.output_size, num_outputs) 80 | elif action_space.__class__.__name__ == "Box": 81 | num_outputs = action_space.shape[0] 82 | self.dist = DiagGaussian(self.network.output_size, num_outputs) 83 | else: 84 | raise NotImplementedError 85 | 86 | self.model_type = model_type 87 | 88 | @property 89 | def is_recurrent(self): 90 | return self.network.is_recurrent 91 | 92 | @property 93 | def rec_state_size(self): 94 | """Size of rnn_hx.""" 95 | return self.network.rec_state_size 96 | 97 | def forward(self, inputs, rnn_hxs, masks, extras): 98 | if extras is None: 99 | return self.network(inputs, rnn_hxs, masks) 100 | else: 101 | return self.network(inputs, rnn_hxs, masks, extras) 102 | 103 | def act(self, inputs, rnn_hxs, masks, extras=None, deterministic=False): 104 | 105 | value, actor_features, rnn_hxs = self(inputs, rnn_hxs, masks, extras) 106 | dist = self.dist(actor_features) 107 | 108 | if deterministic: 109 | action = dist.mode() 110 | else: 111 | action = dist.sample() 112 | 113 | action_log_probs = dist.log_probs(action) 114 | 115 | return value, action, action_log_probs, rnn_hxs 116 | 117 | def get_value(self, inputs, rnn_hxs, masks, extras=None): 118 | value, _, _ = self(inputs, rnn_hxs, masks, extras) 119 | return value 120 | 121 | def evaluate_actions(self, inputs, rnn_hxs, masks, action, extras=None): 122 | 123 | value, actor_features, rnn_hxs = self(inputs, rnn_hxs, masks, extras) 124 | dist = self.dist(actor_features) 125 | 126 | action_log_probs = dist.log_probs(action) 127 | dist_entropy = dist.entropy().mean() 128 | 129 | return value, action_log_probs, dist_entropy, rnn_hxs 130 | 131 | 132 | class Semantic_Mapping(nn.Module): 133 | 134 | """ 135 | Semantic_Mapping 136 | """ 137 | 138 | def __init__(self, args): 139 | super(Semantic_Mapping, self).__init__() 140 | 141 | self.device = args.device 142 | self.screen_h = args.frame_height 143 | self.screen_w = args.frame_width 144 | self.resolution = args.map_resolution 145 | self.z_resolution = args.map_resolution 146 | self.map_size_cm = args.map_size_cm // args.global_downscaling 147 | self.n_channels = 3 148 | self.vision_range = args.vision_range 149 | self.dropout = 0.5 150 | self.fov = args.hfov 151 | self.du_scale = args.du_scale 152 | self.cat_pred_threshold = args.cat_pred_threshold 153 | self.exp_pred_threshold = args.exp_pred_threshold 154 | self.map_pred_threshold = args.map_pred_threshold 155 | self.num_sem_categories = args.num_sem_categories 156 | 157 | self.max_height = int(360 / self.z_resolution) 158 | self.min_height = int(-40 / self.z_resolution) 159 | self.agent_height = args.camera_height * 100. 160 | self.shift_loc = [self.vision_range * 161 | self.resolution // 2, 0, np.pi / 2.0] 162 | self.camera_matrix = du.get_camera_matrix( 163 | self.screen_w, self.screen_h, self.fov) 164 | 165 | self.pool = ChannelPool(1) 166 | 167 | vr = self.vision_range 168 | 169 | self.init_grid = torch.zeros( 170 | args.num_processes, 1 + self.num_sem_categories, vr, vr, 171 | self.max_height - self.min_height 172 | ).float().to(self.device) 173 | self.feat = torch.ones( 174 | args.num_processes, 1 + self.num_sem_categories, 175 | self.screen_h // self.du_scale * self.screen_w // self.du_scale 176 | ).float().to(self.device) 177 | 178 | def forward(self, obs, pose_obs, maps_last, poses_last): 179 | bs, c, h, w = obs.size() 180 | depth = obs[:, 3, :, :] 181 | 182 | point_cloud_t = du.get_point_cloud_from_z_t( 183 | depth, self.camera_matrix, self.device, scale=self.du_scale) 184 | 185 | agent_view_t = du.transform_camera_view_t( 186 | point_cloud_t, self.agent_height, 0, self.device) 187 | 188 | agent_view_centered_t = du.transform_pose_t( 189 | agent_view_t, self.shift_loc, self.device) 190 | 191 | max_h = self.max_height 192 | min_h = self.min_height 193 | xy_resolution = self.resolution 194 | z_resolution = self.z_resolution 195 | vision_range = self.vision_range 196 | XYZ_cm_std = agent_view_centered_t.float() 197 | XYZ_cm_std[..., :2] = (XYZ_cm_std[..., :2] / xy_resolution) 198 | XYZ_cm_std[..., :2] = (XYZ_cm_std[..., :2] - 199 | vision_range // 2.) / vision_range * 2. 200 | XYZ_cm_std[..., 2] = XYZ_cm_std[..., 2] / z_resolution 201 | XYZ_cm_std[..., 2] = (XYZ_cm_std[..., 2] - 202 | (max_h + min_h) // 2.) / (max_h - min_h) * 2. 203 | self.feat[:, 1:, :] = nn.AvgPool2d(self.du_scale)( 204 | obs[:, 4:, :, :] 205 | ).view(bs, c - 4, h // self.du_scale * w // self.du_scale) 206 | 207 | XYZ_cm_std = XYZ_cm_std.permute(0, 3, 1, 2) 208 | XYZ_cm_std = XYZ_cm_std.view(XYZ_cm_std.shape[0], 209 | XYZ_cm_std.shape[1], 210 | XYZ_cm_std.shape[2] * XYZ_cm_std.shape[3]) 211 | 212 | voxels = du.splat_feat_nd( 213 | self.init_grid * 0., self.feat, XYZ_cm_std).transpose(2, 3) 214 | 215 | min_z = int(25 / z_resolution - min_h) 216 | max_z = int((self.agent_height + 1) / z_resolution - min_h) 217 | 218 | agent_height_proj = voxels[..., min_z:max_z].sum(4) 219 | all_height_proj = voxels.sum(4) 220 | 221 | fp_map_pred = agent_height_proj[:, 0:1, :, :] 222 | fp_exp_pred = all_height_proj[:, 0:1, :, :] 223 | fp_map_pred = fp_map_pred / self.map_pred_threshold 224 | fp_exp_pred = fp_exp_pred / self.exp_pred_threshold 225 | fp_map_pred = torch.clamp(fp_map_pred, min=0.0, max=1.0) 226 | fp_exp_pred = torch.clamp(fp_exp_pred, min=0.0, max=1.0) 227 | 228 | pose_pred = poses_last 229 | 230 | agent_view = torch.zeros(bs, c, 231 | self.map_size_cm // self.resolution, 232 | self.map_size_cm // self.resolution 233 | ).to(self.device) 234 | 235 | x1 = self.map_size_cm // (self.resolution * 2) - self.vision_range // 2 236 | x2 = x1 + self.vision_range 237 | y1 = self.map_size_cm // (self.resolution * 2) 238 | y2 = y1 + self.vision_range 239 | agent_view[:, 0:1, y1:y2, x1:x2] = fp_map_pred 240 | agent_view[:, 1:2, y1:y2, x1:x2] = fp_exp_pred 241 | agent_view[:, 4:, y1:y2, x1:x2] = torch.clamp( 242 | agent_height_proj[:, 1:, :, :] / self.cat_pred_threshold, 243 | min=0.0, max=1.0) 244 | 245 | corrected_pose = pose_obs 246 | 247 | def get_new_pose_batch(pose, rel_pose_change): 248 | 249 | pose[:, 1] += rel_pose_change[:, 0] * \ 250 | torch.sin(pose[:, 2] / 57.29577951308232) \ 251 | + rel_pose_change[:, 1] * \ 252 | torch.cos(pose[:, 2] / 57.29577951308232) 253 | pose[:, 0] += rel_pose_change[:, 0] * \ 254 | torch.cos(pose[:, 2] / 57.29577951308232) \ 255 | - rel_pose_change[:, 1] * \ 256 | torch.sin(pose[:, 2] / 57.29577951308232) 257 | pose[:, 2] += rel_pose_change[:, 2] * 57.29577951308232 258 | 259 | pose[:, 2] = torch.fmod(pose[:, 2] - 180.0, 360.0) + 180.0 260 | pose[:, 2] = torch.fmod(pose[:, 2] + 180.0, 360.0) - 180.0 261 | 262 | return pose 263 | 264 | current_poses = get_new_pose_batch(poses_last, corrected_pose) 265 | st_pose = current_poses.clone().detach() 266 | 267 | st_pose[:, :2] = - (st_pose[:, :2] 268 | * 100.0 / self.resolution 269 | - self.map_size_cm // (self.resolution * 2)) /\ 270 | (self.map_size_cm // (self.resolution * 2)) 271 | st_pose[:, 2] = 90. - (st_pose[:, 2]) 272 | 273 | rot_mat, trans_mat = get_grid(st_pose, agent_view.size(), 274 | self.device) 275 | 276 | rotated = F.grid_sample(agent_view, rot_mat, align_corners=True) 277 | translated = F.grid_sample(rotated, trans_mat, align_corners=True) 278 | 279 | maps2 = torch.cat((maps_last.unsqueeze(1), translated.unsqueeze(1)), 1) 280 | 281 | map_pred, _ = torch.max(maps2, 1) 282 | 283 | return fp_map_pred, map_pred, pose_pred, current_poses 284 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-fmm==2019.1.30 2 | scikit-learn==0.22.2.post1 3 | scikit-image==0.15.0 4 | numpy>=1.20.2 5 | ifcfg 6 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | 5 | from envs import make_vec_envs 6 | from arguments import get_args 7 | 8 | os.environ["OMP_NUM_THREADS"] = "1" 9 | 10 | args = get_args() 11 | 12 | np.random.seed(args.seed) 13 | torch.manual_seed(args.seed) 14 | 15 | if args.cuda: 16 | torch.cuda.manual_seed(args.seed) 17 | 18 | 19 | def main(): 20 | num_episodes = int(args.num_eval_episodes) 21 | args.device = torch.device("cuda:0" if args.cuda else "cpu") 22 | 23 | torch.set_num_threads(1) 24 | envs = make_vec_envs(args) 25 | obs, infos = envs.reset() 26 | 27 | for ep_num in range(num_episodes): 28 | for step in range(args.max_episode_length): 29 | action = torch.randint(0, 3, (args.num_processes,)) 30 | obs, rew, done, infos = envs.step(action) 31 | 32 | if done: 33 | break 34 | 35 | print("Test successfully completed") 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /utils/distributions.py: -------------------------------------------------------------------------------- 1 | # The following code is largely borrowed from: 2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/distributions.py 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from utils.model import AddBias 8 | 9 | """ 10 | Modify standard PyTorch distributions so they are compatible with this code. 11 | """ 12 | 13 | FixedCategorical = torch.distributions.Categorical 14 | 15 | old_sample = FixedCategorical.sample 16 | FixedCategorical.sample = lambda self: old_sample(self) 17 | 18 | log_prob_cat = FixedCategorical.log_prob 19 | FixedCategorical.log_probs = lambda self, actions: \ 20 | log_prob_cat(self, actions.squeeze(-1)) 21 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True) 22 | 23 | FixedNormal = torch.distributions.Normal 24 | log_prob_normal = FixedNormal.log_prob 25 | FixedNormal.log_probs = lambda self, actions: \ 26 | log_prob_normal(self, actions).sum(-1, keepdim=False) 27 | 28 | entropy = FixedNormal.entropy 29 | FixedNormal.entropy = lambda self: entropy(self).sum(-1) 30 | 31 | FixedNormal.mode = lambda self: self.mean 32 | 33 | 34 | class Categorical(nn.Module): 35 | 36 | def __init__(self, num_inputs, num_outputs): 37 | super(Categorical, self).__init__() 38 | self.linear = nn.Linear(num_inputs, num_outputs) 39 | 40 | def forward(self, x): 41 | x = self.linear(x) 42 | return FixedCategorical(logits=x) 43 | 44 | 45 | class DiagGaussian(nn.Module): 46 | 47 | def __init__(self, num_inputs, num_outputs): 48 | super(DiagGaussian, self).__init__() 49 | 50 | self.fc_mean = nn.Linear(num_inputs, num_outputs) 51 | self.logstd = AddBias(torch.zeros(num_outputs)) 52 | 53 | def forward(self, x): 54 | action_mean = self.fc_mean(x) 55 | 56 | zeros = torch.zeros(action_mean.size()) 57 | if x.is_cuda: 58 | zeros = zeros.cuda() 59 | 60 | action_logstd = self.logstd(zeros) 61 | return FixedNormal(action_mean, action_logstd.exp()) 62 | -------------------------------------------------------------------------------- /utils/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import functional as F 5 | 6 | 7 | def get_grid(pose, grid_size, device): 8 | """ 9 | Input: 10 | `pose` FloatTensor(bs, 3) 11 | `grid_size` 4-tuple (bs, _, grid_h, grid_w) 12 | `device` torch.device (cpu or gpu) 13 | Output: 14 | `rot_grid` FloatTensor(bs, grid_h, grid_w, 2) 15 | `trans_grid` FloatTensor(bs, grid_h, grid_w, 2) 16 | 17 | """ 18 | pose = pose.float() 19 | x = pose[:, 0] 20 | y = pose[:, 1] 21 | t = pose[:, 2] 22 | 23 | bs = x.size(0) 24 | t = t * np.pi / 180. 25 | cos_t = t.cos() 26 | sin_t = t.sin() 27 | 28 | theta11 = torch.stack([cos_t, -sin_t, 29 | torch.zeros(cos_t.shape).float().to(device)], 1) 30 | theta12 = torch.stack([sin_t, cos_t, 31 | torch.zeros(cos_t.shape).float().to(device)], 1) 32 | theta1 = torch.stack([theta11, theta12], 1) 33 | 34 | theta21 = torch.stack([torch.ones(x.shape).to(device), 35 | -torch.zeros(x.shape).to(device), x], 1) 36 | theta22 = torch.stack([torch.zeros(x.shape).to(device), 37 | torch.ones(x.shape).to(device), y], 1) 38 | theta2 = torch.stack([theta21, theta22], 1) 39 | 40 | rot_grid = F.affine_grid(theta1, torch.Size(grid_size)) 41 | trans_grid = F.affine_grid(theta2, torch.Size(grid_size)) 42 | 43 | return rot_grid, trans_grid 44 | 45 | 46 | class ChannelPool(nn.MaxPool1d): 47 | def forward(self, x): 48 | n, c, w, h = x.size() 49 | x = x.view(n, c, w * h).permute(0, 2, 1) 50 | x = x.contiguous() 51 | pooled = F.max_pool1d(x, c, 1) 52 | _, _, c = pooled.size() 53 | pooled = pooled.permute(0, 2, 1) 54 | return pooled.view(n, c, w, h) 55 | 56 | 57 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/utils.py#L32 58 | class AddBias(nn.Module): 59 | def __init__(self, bias): 60 | super(AddBias, self).__init__() 61 | self._bias = nn.Parameter(bias.unsqueeze(1)) 62 | 63 | def forward(self, x): 64 | if x.dim() == 2: 65 | bias = self._bias.t().view(1, -1) 66 | else: 67 | bias = self._bias.t().view(1, -1, 1, 1) 68 | 69 | return x + bias 70 | 71 | 72 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L10 73 | class Flatten(nn.Module): 74 | def forward(self, x): 75 | return x.view(x.size(0), -1) 76 | 77 | 78 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L82 79 | class NNBase(nn.Module): 80 | 81 | def __init__(self, recurrent, recurrent_input_size, hidden_size): 82 | 83 | super(NNBase, self).__init__() 84 | self._hidden_size = hidden_size 85 | self._recurrent = recurrent 86 | 87 | if recurrent: 88 | self.gru = nn.GRUCell(recurrent_input_size, hidden_size) 89 | nn.init.orthogonal_(self.gru.weight_ih.data) 90 | nn.init.orthogonal_(self.gru.weight_hh.data) 91 | self.gru.bias_ih.data.fill_(0) 92 | self.gru.bias_hh.data.fill_(0) 93 | 94 | @property 95 | def is_recurrent(self): 96 | return self._recurrent 97 | 98 | @property 99 | def rec_state_size(self): 100 | if self._recurrent: 101 | return self._hidden_size 102 | return 1 103 | 104 | @property 105 | def output_size(self): 106 | return self._hidden_size 107 | 108 | def _forward_gru(self, x, hxs, masks): 109 | if x.size(0) == hxs.size(0): 110 | x = hxs = self.gru(x, hxs * masks[:, None]) 111 | else: 112 | # x is a (T, N, -1) tensor that has been flatten to (T * N, -1) 113 | N = hxs.size(0) 114 | T = int(x.size(0) / N) 115 | 116 | # unflatten 117 | x = x.view(T, N, x.size(1)) 118 | 119 | # Same deal with masks 120 | masks = masks.view(T, N, 1) 121 | 122 | outputs = [] 123 | for i in range(T): 124 | hx = hxs = self.gru(x[i], hxs * masks[i]) 125 | outputs.append(hx) 126 | 127 | # x is a (T, N, -1) tensor 128 | x = torch.stack(outputs, dim=0) 129 | # flatten 130 | x = x.view(T * N, -1) 131 | 132 | return x, hxs 133 | -------------------------------------------------------------------------------- /utils/optimization.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import re 3 | 4 | from torch import optim 5 | 6 | 7 | def get_optimizer(parameters, s): 8 | """ 9 | Parse optimizer parameters. 10 | Input should be of the form: 11 | - "sgd,lr=0.01" 12 | - "adagrad,lr=0.1,lr_decay=0.05" 13 | """ 14 | if "," in s: 15 | method = s[:s.find(',')] 16 | optim_params = {} 17 | for x in s[s.find(',') + 1:].split(','): 18 | split = x.split('=') 19 | assert len(split) == 2 20 | assert re.match( 21 | r"^[+-]?(\d+(\.\d*)?|\.\d+)$", 22 | split[1]) is not None 23 | optim_params[split[0]] = float(split[1]) 24 | else: 25 | method = s 26 | optim_params = {} 27 | 28 | if method == 'adadelta': 29 | optim_fn = optim.Adadelta 30 | elif method == 'adagrad': 31 | optim_fn = optim.Adagrad 32 | elif method == 'adam': 33 | optim_fn = optim.Adam 34 | optim_params['betas'] = (optim_params.get('beta1', 0.5), 35 | optim_params.get('beta2', 0.999)) 36 | optim_params.pop('beta1', None) 37 | optim_params.pop('beta2', None) 38 | elif method == 'adamax': 39 | optim_fn = optim.Adamax 40 | elif method == 'asgd': 41 | optim_fn = optim.ASGD 42 | elif method == 'rmsprop': 43 | optim_fn = optim.RMSprop 44 | elif method == 'rprop': 45 | optim_fn = optim.Rprop 46 | elif method == 'sgd': 47 | optim_fn = optim.SGD 48 | assert 'lr' in optim_params 49 | else: 50 | raise Exception('Unknown optimization method: "%s"' % method) 51 | 52 | # check that we give good parameters to the optimizer 53 | expected_args = inspect.getargspec(optim_fn.__init__)[0] 54 | assert expected_args[:2] == ['self', 'params'] 55 | if not all(k in expected_args[2:] for k in optim_params.keys()): 56 | raise Exception('Unexpected parameters: expected "%s", got "%s"' % ( 57 | str(expected_args[2:]), str(optim_params.keys()))) 58 | 59 | return optim_fn(parameters, **optim_params) 60 | -------------------------------------------------------------------------------- /utils/storage.py: -------------------------------------------------------------------------------- 1 | # The following code is largely borrowed from: 2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/storage.py 3 | 4 | from collections import namedtuple 5 | 6 | import numpy as np 7 | import torch 8 | from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler 9 | 10 | 11 | def _flatten_helper(T, N, _tensor): 12 | return _tensor.view(T * N, *_tensor.size()[2:]) 13 | 14 | 15 | class RolloutStorage(object): 16 | 17 | def __init__(self, num_steps, num_processes, obs_shape, action_space, 18 | rec_state_size): 19 | 20 | if action_space.__class__.__name__ == 'Discrete': 21 | self.n_actions = 1 22 | action_type = torch.long 23 | else: 24 | self.n_actions = action_space.shape[0] 25 | action_type = torch.float32 26 | 27 | self.obs = torch.zeros(num_steps + 1, num_processes, *obs_shape) 28 | self.rec_states = torch.zeros(num_steps + 1, num_processes, 29 | rec_state_size) 30 | self.rewards = torch.zeros(num_steps, num_processes) 31 | self.value_preds = torch.zeros(num_steps + 1, num_processes) 32 | self.returns = torch.zeros(num_steps + 1, num_processes) 33 | self.action_log_probs = torch.zeros(num_steps, num_processes) 34 | self.actions = torch.zeros((num_steps, num_processes, self.n_actions), 35 | dtype=action_type) 36 | self.masks = torch.ones(num_steps + 1, num_processes) 37 | 38 | self.num_steps = num_steps 39 | self.step = 0 40 | self.has_extras = False 41 | self.extras_size = None 42 | 43 | def to(self, device): 44 | self.obs = self.obs.to(device) 45 | self.rec_states = self.rec_states.to(device) 46 | self.rewards = self.rewards.to(device) 47 | self.value_preds = self.value_preds.to(device) 48 | self.returns = self.returns.to(device) 49 | self.action_log_probs = self.action_log_probs.to(device) 50 | self.actions = self.actions.to(device) 51 | self.masks = self.masks.to(device) 52 | if self.has_extras: 53 | self.extras = self.extras.to(device) 54 | return self 55 | 56 | def insert(self, obs, rec_states, actions, action_log_probs, value_preds, 57 | rewards, masks): 58 | self.obs[self.step + 1].copy_(obs) 59 | self.rec_states[self.step + 1].copy_(rec_states) 60 | self.actions[self.step].copy_(actions.view(-1, self.n_actions)) 61 | self.action_log_probs[self.step].copy_(action_log_probs) 62 | self.value_preds[self.step].copy_(value_preds) 63 | self.rewards[self.step].copy_(rewards) 64 | self.masks[self.step + 1].copy_(masks) 65 | 66 | self.step = (self.step + 1) % self.num_steps 67 | 68 | def after_update(self): 69 | self.obs[0].copy_(self.obs[-1]) 70 | self.rec_states[0].copy_(self.rec_states[-1]) 71 | self.masks[0].copy_(self.masks[-1]) 72 | if self.has_extras: 73 | self.extras[0].copy_(self.extras[-1]) 74 | 75 | def compute_returns(self, next_value, use_gae, gamma, tau): 76 | if use_gae: 77 | self.value_preds[-1] = next_value 78 | gae = 0 79 | for step in reversed(range(self.rewards.size(0))): 80 | delta = self.rewards[step] + gamma \ 81 | * self.value_preds[step + 1] * self.masks[step + 1] \ 82 | - self.value_preds[step] 83 | gae = delta + gamma * tau * self.masks[step + 1] * gae 84 | self.returns[step] = gae + self.value_preds[step] 85 | else: 86 | self.returns[-1] = next_value 87 | for step in reversed(range(self.rewards.size(0))): 88 | self.returns[step] = self.returns[step + 1] * gamma \ 89 | * self.masks[step + 1] + self.rewards[step] 90 | 91 | def feed_forward_generator(self, advantages, num_mini_batch): 92 | 93 | num_steps, num_processes = self.rewards.size()[0:2] 94 | batch_size = num_processes * num_steps 95 | mini_batch_size = batch_size // num_mini_batch 96 | assert batch_size >= num_mini_batch, ( 97 | "PPO requires the number of processes ({}) " 98 | "* number of steps ({}) = {} " 99 | "to be greater than or equal to " 100 | "the number of PPO mini batches ({})." 101 | "".format(num_processes, num_steps, num_processes * num_steps, 102 | num_mini_batch)) 103 | 104 | sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), 105 | mini_batch_size, drop_last=False) 106 | 107 | for indices in sampler: 108 | yield { 109 | 'obs': self.obs[:-1].view(-1, *self.obs.size()[2:])[indices], 110 | 'rec_states': self.rec_states[:-1].view( 111 | -1, self.rec_states.size(-1))[indices], 112 | 'actions': self.actions.view(-1, self.n_actions)[indices], 113 | 'value_preds': self.value_preds[:-1].view(-1)[indices], 114 | 'returns': self.returns[:-1].view(-1)[indices], 115 | 'masks': self.masks[:-1].view(-1)[indices], 116 | 'old_action_log_probs': self.action_log_probs.view(-1)[indices], 117 | 'adv_targ': advantages.view(-1)[indices], 118 | 'extras': self.extras[:-1].view( 119 | -1, self.extras_size)[indices] 120 | if self.has_extras else None, 121 | } 122 | 123 | def recurrent_generator(self, advantages, num_mini_batch): 124 | 125 | num_processes = self.rewards.size(1) 126 | assert num_processes >= num_mini_batch, ( 127 | "PPO requires the number of processes ({}) " 128 | "to be greater than or equal to the number of " 129 | "PPO mini batches ({}).".format(num_processes, num_mini_batch)) 130 | num_envs_per_batch = num_processes // num_mini_batch 131 | perm = torch.randperm(num_processes) 132 | T, N = self.num_steps, num_envs_per_batch 133 | 134 | for start_ind in range(0, num_processes, num_envs_per_batch): 135 | 136 | obs = [] 137 | rec_states = [] 138 | actions = [] 139 | value_preds = [] 140 | returns = [] 141 | masks = [] 142 | old_action_log_probs = [] 143 | adv_targ = [] 144 | if self.has_extras: 145 | extras = [] 146 | 147 | for offset in range(num_envs_per_batch): 148 | 149 | ind = perm[start_ind + offset] 150 | obs.append(self.obs[:-1, ind]) 151 | rec_states.append(self.rec_states[0:1, ind]) 152 | actions.append(self.actions[:, ind]) 153 | value_preds.append(self.value_preds[:-1, ind]) 154 | returns.append(self.returns[:-1, ind]) 155 | masks.append(self.masks[:-1, ind]) 156 | old_action_log_probs.append(self.action_log_probs[:, ind]) 157 | adv_targ.append(advantages[:, ind]) 158 | if self.has_extras: 159 | extras.append(self.extras[:-1, ind]) 160 | 161 | # These are all tensors of size (T, N, ...) 162 | obs = torch.stack(obs, 1) 163 | actions = torch.stack(actions, 1) 164 | value_preds = torch.stack(value_preds, 1) 165 | returns = torch.stack(returns, 1) 166 | masks = torch.stack(masks, 1) 167 | old_action_log_probs = torch.stack(old_action_log_probs, 1) 168 | adv_targ = torch.stack(adv_targ, 1) 169 | if self.has_extras: 170 | extras = torch.stack(extras, 1) 171 | 172 | yield { 173 | 'obs': _flatten_helper(T, N, obs), 174 | 'actions': _flatten_helper(T, N, actions), 175 | 'value_preds': _flatten_helper(T, N, value_preds), 176 | 'returns': _flatten_helper(T, N, returns), 177 | 'masks': _flatten_helper(T, N, masks), 178 | 'old_action_log_probs': _flatten_helper( 179 | T, N, old_action_log_probs), 180 | 'adv_targ': _flatten_helper(T, N, adv_targ), 181 | 'extras': _flatten_helper( 182 | T, N, extras) if self.has_extras else None, 183 | 'rec_states': torch.stack(rec_states, 1).view(N, -1), 184 | } 185 | 186 | 187 | class GlobalRolloutStorage(RolloutStorage): 188 | 189 | def __init__(self, num_steps, num_processes, obs_shape, action_space, 190 | rec_state_size, extras_size): 191 | super(GlobalRolloutStorage, self).__init__( 192 | num_steps, num_processes, obs_shape, action_space, rec_state_size) 193 | self.extras = torch.zeros((num_steps + 1, num_processes, extras_size), 194 | dtype=torch.long) 195 | self.has_extras = True 196 | self.extras_size = extras_size 197 | 198 | def insert(self, obs, rec_states, actions, action_log_probs, value_preds, 199 | rewards, masks, extras): 200 | self.extras[self.step + 1].copy_(extras) 201 | super(GlobalRolloutStorage, self).insert( 202 | obs, rec_states, actions, 203 | action_log_probs, value_preds, rewards, masks) 204 | --------------------------------------------------------------------------------