├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── agents
├── sem_exp.py
└── utils
│ ├── semantic_prediction.py
│ └── visualization.py
├── algo
├── __init__.py
└── ppo.py
├── arguments.py
├── configs
├── Base-RCNN-FPN.yaml
└── COCO-InstanceSegmentation
│ └── mask_rcnn_R_50_FPN_3x.yaml
├── constants.py
├── docs
├── DOCKER_INSTRUCTIONS.md
├── INSTRUCTIONS.md
├── example.gif
├── legend.png
└── overview.jpg
├── envs
├── __init__.py
├── habitat
│ ├── __init__.py
│ ├── configs
│ │ └── tasks
│ │ │ └── objectnav_gibson.yaml
│ ├── objectgoal_env.py
│ └── utils
│ │ └── vector_env.py
└── utils
│ ├── depth_utils.py
│ ├── fmm_planner.py
│ ├── map_builder.py
│ ├── pose.py
│ └── rotation_utils.py
├── main.py
├── model.py
├── requirements.txt
├── test.py
└── utils
├── distributions.py
├── model.py
├── optimization.py
└── storage.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # vim
132 | *.swo
133 | *.swp
134 |
135 | .idea/
136 | .vscode/
137 | *.DS_Store
138 |
139 | # Data and log folders
140 | saved/
141 | tmp/
142 | pretrained_models/
143 | data/
144 | data
145 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Base image
2 | FROM nvidia/cudagl:10.1-devel-ubuntu16.04
3 |
4 | # Setup basic packages
5 | RUN apt-get update && apt-get install -y --no-install-recommends \
6 | build-essential \
7 | git \
8 | curl \
9 | vim \
10 | ca-certificates \
11 | libjpeg-dev \
12 | libpng-dev \
13 | libglfw3-dev \
14 | libglm-dev \
15 | libx11-dev \
16 | libomp-dev \
17 | libegl1-mesa-dev \
18 | pkg-config \
19 | wget \
20 | zip \
21 | htop \
22 | tmux \
23 | unzip &&\
24 | rm -rf /var/lib/apt/lists/*
25 |
26 | # Install conda
27 | RUN wget -O $HOME/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh &&\
28 | chmod +x ~/miniconda.sh &&\
29 | ~/miniconda.sh -b -p /custom/conda &&\
30 | rm ~/miniconda.sh &&\
31 | /custom/conda/bin/conda install numpy pyyaml scipy ipython mkl mkl-include &&\
32 | /custom/conda/bin/conda clean -ya
33 | ENV PATH /custom/conda/bin:$PATH
34 |
35 | # Install cmake
36 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.14.0/cmake-3.14.0-Linux-x86_64.sh
37 | RUN mkdir /opt/cmake
38 | RUN sh /cmake-3.14.0-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
39 | RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
40 | RUN cmake --version
41 |
42 | # Setup habitat-sim
43 | RUN git clone https://github.com/facebookresearch/habitat-sim.git
44 | RUN /bin/bash -c "cd habitat-sim; git checkout tags/v0.1.5; pip install -r requirements.txt; python setup.py install --headless --with-cuda"
45 |
46 | # Install challenge specific habitat-api
47 | RUN git clone https://github.com/facebookresearch/habitat-api.git
48 | RUN /bin/bash -c "cd habitat-api; git checkout tags/v0.1.5; pip install -e ."
49 | RUN /bin/bash -c "cd habitat-api; wget http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip; unzip habitat-test-scenes.zip"
50 |
51 | # Silence habitat-sim logs
52 | ENV GLOG_minloglevel=2
53 | ENV MAGNUM_LOG="quiet"
54 |
55 | # Install project specific packages
56 | RUN /bin/bash -c "apt-get update; apt-get install -y libsm6 libxext6 libxrender-dev; pip install opencv-python"
57 | RUN /bin/bash -c "pip install --upgrade cython numpy"
58 | RUN /bin/bash -c "pip install matplotlib seaborn==0.9.0 scikit-fmm==2019.1.30 scikit-image==0.15.0 imageio==2.6.0 scikit-learn==0.22.2.post1 ifcfg"
59 |
60 | # Install pytorch and torch_scatter
61 | RUN conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.2 -c pytorch
62 | RUN /bin/bash -c "pip install torch_scatter"
63 |
64 | # Install detectron2
65 | RUN /bin/bash -c "python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.6/index.html"
66 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Devendra Chaplot
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Object Goal Navigation using Goal-Oriented Semantic Exploration
2 | This is a PyTorch implementation of the NeurIPS-20 paper:
3 |
4 | [Object Goal Navigation using Goal-Oriented Semantic Exploration](https://arxiv.org/pdf/2007.00643.pdf)
5 | Devendra Singh Chaplot, Dhiraj Gandhi, Abhinav Gupta, Ruslan Salakhutdinov
6 | Carnegie Mellon University, Facebook AI Research
7 |
8 | Winner of the [CVPR 2020 Habitat ObjectNav Challenge](https://aihabitat.org/challenge/2020/).
9 |
10 | Project Website: https://devendrachaplot.github.io/projects/semantic-exploration
11 |
12 | 
13 |
14 | ### Overview:
15 | The Goal-Oriented Semantic Exploration (SemExp) model consists of three modules: a Semantic Mapping Module, a Goal-Oriented Semantic Policy, and a deterministic Local Policy.
16 | As shown below, the Semantic Mapping model builds a semantic map over time. The Goal-Oriented Semantic Policy selects a long-term goal based on the semantic
17 | map to reach the given object goal efficiently. A deterministic local policy based on analytical planners is used to take low-level navigation actions to reach the long-term goal.
18 |
19 | 
20 |
21 | ### This repository contains:
22 | - Episode train and test datasets for [Object Goal Navigation](https://arxiv.org/pdf/2007.00643.pdf) task for the Gibson dataset in the Habitat Simulator.
23 | - The code to train and evaluate the Semantic Exploration (SemExp) model on the Object Goal Navigation task.
24 | - Pretrained SemExp model.
25 |
26 | ## Installing Dependencies
27 | - We use earlier versions of [habitat-sim](https://github.com/facebookresearch/habitat-sim) and [habitat-lab](https://github.com/facebookresearch/habitat-lab) as specified below:
28 |
29 | Installing habitat-sim:
30 | ```
31 | git clone https://github.com/facebookresearch/habitat-sim.git
32 | cd habitat-sim; git checkout tags/v0.1.5;
33 | pip install -r requirements.txt;
34 | python setup.py install --headless
35 | python setup.py install # (for Mac OS)
36 | ```
37 |
38 | Installing habitat-lab:
39 | ```
40 | git clone https://github.com/facebookresearch/habitat-lab.git
41 | cd habitat-lab; git checkout tags/v0.1.5;
42 | pip install -e .
43 | ```
44 | Check habitat installation by running `python examples/benchmark.py` in the habitat-lab folder.
45 |
46 | - Install [pytorch](https://pytorch.org/) according to your system configuration. The code is tested on pytorch v1.6.0 and cudatoolkit v10.2. If you are using conda:
47 | ```
48 | conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 #(Linux with GPU)
49 | conda install pytorch==1.6.0 torchvision==0.7.0 -c pytorch #(Mac OS)
50 | ```
51 |
52 | - Install [detectron2](https://github.com/facebookresearch/detectron2/) according to your system configuration. If you are using conda:
53 | ```
54 | python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.6/index.html #(Linux with GPU)
55 | CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' #(Mac OS)
56 | ```
57 |
58 | ### Docker and Singularity images:
59 | We provide experimental [docker](https://www.docker.com/) and [singularity](https://sylabs.io/) images with all the dependencies installed, see [Docker Instructions](./docs/DOCKER_INSTRUCTIONS.md).
60 |
61 |
62 | ## Setup
63 | Clone the repository and install other requirements:
64 | ```
65 | git clone https://github.com/devendrachaplot/Object-Goal-Navigation/
66 | cd Object-Goal-Navigation/;
67 | pip install -r requirements.txt
68 | ```
69 |
70 | ### Downloading scene dataset
71 | - Download the Gibson dataset using the instructions here: https://github.com/facebookresearch/habitat-lab#scenes-datasets (download the 11GB file `gibson_habitat_trainval.zip`)
72 | - Move the Gibson scene dataset or create a symlink at `data/scene_datasets/gibson_semantic`.
73 |
74 | ### Downloading episode dataset
75 | - Download the episode dataset:
76 | ```
77 | wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1tslnZAkH8m3V5nP8pbtBmaR2XEfr8Rau' -O objectnav_gibson_v1.1.zip
78 | ```
79 | - Unzip the dataset into `data/datasets/objectnav/gibson/v1.1/`
80 |
81 | ### Setting up datasets
82 | The code requires the datasets in a `data` folder in the following format (same as habitat-lab):
83 | ```
84 | Object-Goal-Navigation/
85 | data/
86 | scene_datasets/
87 | gibson_semantic/
88 | Adrian.glb
89 | Adrian.navmesh
90 | ...
91 | datasets/
92 | objectnav/
93 | gibson/
94 | v1.1/
95 | train/
96 | val/
97 | ```
98 |
99 |
100 | ### Test setup
101 | To verify that the data is setup correctly, run:
102 | ```
103 | python test.py --agent random -n1 --num_eval_episodes 1 --auto_gpu_config 0
104 | ```
105 |
106 | ## Usage
107 |
108 | ### Training:
109 | For training the SemExp model on the Object Goal Navigation task:
110 | ```
111 | python main.py
112 | ```
113 |
114 | ### Downloading pre-trained models
115 | ```
116 | mkdir pretrained_models;
117 | wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=171ZA7XNu5vi3XLpuKs8DuGGZrYyuSjL0' -O pretrained_models/sem_exp.pth
118 | ```
119 |
120 | ### For evaluation:
121 | For evaluating the pre-trained model:
122 | ```
123 | python main.py --split val --eval 1 --load pretrained_models/sem_exp.pth
124 | ```
125 |
126 | For visualizing the agent observations and predicted semantic map, add `-v 1` as an argument to the above command.
127 |
128 | The pre-trained model should get 0.657 Success, 0.339 SPL and 1.474 DTG.
129 |
130 | For more detailed instructions, see [INSTRUCTIONS](./docs/INSTRUCTIONS.md).
131 |
132 |
133 | ## Cite as
134 | >Chaplot, D.S., Gandhi, D., Gupta, A. and Salakhutdinov, R., 2020. Object Goal Navigation using Goal-Oriented Semantic Exploration. In Neural Information Processing Systems (NeurIPS-20). ([PDF](https://arxiv.org/pdf/2007.00643.pdf))
135 |
136 | ### Bibtex:
137 | ```
138 | @inproceedings{chaplot2020object,
139 | title={Object Goal Navigation using Goal-Oriented Semantic Exploration},
140 | author={Chaplot, Devendra Singh and Gandhi, Dhiraj and
141 | Gupta, Abhinav and Salakhutdinov, Ruslan},
142 | booktitle={In Neural Information Processing Systems (NeurIPS)},
143 | year={2020}
144 | }
145 | ```
146 |
147 | ## Related Projects
148 | - This project builds on the [Active Neural SLAM](https://devendrachaplot.github.io/projects/Neural-SLAM) paper. The code and pretrained models for the Active Neural SLAM system are available at:
149 | https://github.com/devendrachaplot/Neural-SLAM.
150 | - The Semantic Mapping module is similar to the one used in [Semantic Curiosity](https://devendrachaplot.github.io/projects/SemanticCuriosity).
151 |
152 | ## Acknowledgements
153 | This repository uses [Habitat Lab](https://github.com/facebookresearch/habitat-lab) implementation for running the RL environment.
154 | The implementation of PPO is borrowed from [ikostrikov/pytorch-a2c-ppo-acktr-gail](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/).
155 | The Mask-RCNN implementation is based on the [detectron2](https://github.com/facebookresearch/detectron2/) repository. We would also like to thank Shubham Tulsiani and Saurabh Gupta for their help in implementing some parts of the code.
156 |
--------------------------------------------------------------------------------
/agents/sem_exp.py:
--------------------------------------------------------------------------------
1 | import math
2 | import os
3 | import cv2
4 | import numpy as np
5 | import skimage.morphology
6 | from PIL import Image
7 | from torchvision import transforms
8 |
9 | from envs.utils.fmm_planner import FMMPlanner
10 | from envs.habitat.objectgoal_env import ObjectGoal_Env
11 | from agents.utils.semantic_prediction import SemanticPredMaskRCNN
12 | from constants import color_palette
13 | import envs.utils.pose as pu
14 | import agents.utils.visualization as vu
15 |
16 |
17 | class Sem_Exp_Env_Agent(ObjectGoal_Env):
18 | """The Sem_Exp environment agent class. A seperate Sem_Exp_Env_Agent class
19 | object is used for each environment thread.
20 |
21 | """
22 |
23 | def __init__(self, args, rank, config_env, dataset):
24 |
25 | self.args = args
26 | super().__init__(args, rank, config_env, dataset)
27 |
28 | # initialize transform for RGB observations
29 | self.res = transforms.Compose(
30 | [transforms.ToPILImage(),
31 | transforms.Resize((args.frame_height, args.frame_width),
32 | interpolation=Image.NEAREST)])
33 |
34 | # initialize semantic segmentation prediction model
35 | if args.sem_gpu_id == -1:
36 | args.sem_gpu_id = config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID
37 |
38 | self.sem_pred = SemanticPredMaskRCNN(args)
39 |
40 | # initializations for planning:
41 | self.selem = skimage.morphology.disk(3)
42 |
43 | self.obs = None
44 | self.obs_shape = None
45 | self.collision_map = None
46 | self.visited = None
47 | self.visited_vis = None
48 | self.col_width = None
49 | self.curr_loc = None
50 | self.last_loc = None
51 | self.last_action = None
52 | self.count_forward_actions = None
53 |
54 | if args.visualize or args.print_images:
55 | self.legend = cv2.imread('docs/legend.png')
56 | self.vis_image = None
57 | self.rgb_vis = None
58 |
59 | def reset(self):
60 | args = self.args
61 |
62 | obs, info = super().reset()
63 | obs = self._preprocess_obs(obs)
64 |
65 | self.obs_shape = obs.shape
66 |
67 | # Episode initializations
68 | map_shape = (args.map_size_cm // args.map_resolution,
69 | args.map_size_cm // args.map_resolution)
70 | self.collision_map = np.zeros(map_shape)
71 | self.visited = np.zeros(map_shape)
72 | self.visited_vis = np.zeros(map_shape)
73 | self.col_width = 1
74 | self.count_forward_actions = 0
75 | self.curr_loc = [args.map_size_cm / 100.0 / 2.0,
76 | args.map_size_cm / 100.0 / 2.0, 0.]
77 | self.last_action = None
78 |
79 | if args.visualize or args.print_images:
80 | self.vis_image = vu.init_vis_image(self.goal_name, self.legend)
81 |
82 | return obs, info
83 |
84 | def plan_act_and_preprocess(self, planner_inputs):
85 | """Function responsible for planning, taking the action and
86 | preprocessing observations
87 |
88 | Args:
89 | planner_inputs (dict):
90 | dict with following keys:
91 | 'map_pred' (ndarray): (M, M) map prediction
92 | 'goal' (ndarray): (M, M) mat denoting goal locations
93 | 'pose_pred' (ndarray): (7,) array denoting pose (x,y,o)
94 | and planning window (gx1, gx2, gy1, gy2)
95 | 'found_goal' (bool): whether the goal object is found
96 |
97 | Returns:
98 | obs (ndarray): preprocessed observations ((4+C) x H x W)
99 | reward (float): amount of reward returned after previous action
100 | done (bool): whether the episode has ended
101 | info (dict): contains timestep, pose, goal category and
102 | evaluation metric info
103 | """
104 |
105 | # plan
106 | if planner_inputs["wait"]:
107 | self.last_action = None
108 | self.info["sensor_pose"] = [0., 0., 0.]
109 | return np.zeros(self.obs.shape), 0., False, self.info
110 |
111 | # Reset reward if new long-term goal
112 | if planner_inputs["new_goal"]:
113 | self.info["g_reward"] = 0
114 |
115 | action = self._plan(planner_inputs)
116 |
117 | if self.args.visualize or self.args.print_images:
118 | self._visualize(planner_inputs)
119 |
120 | if action >= 0:
121 |
122 | # act
123 | action = {'action': action}
124 | obs, rew, done, info = super().step(action)
125 |
126 | # preprocess obs
127 | obs = self._preprocess_obs(obs)
128 | self.last_action = action['action']
129 | self.obs = obs
130 | self.info = info
131 |
132 | info['g_reward'] += rew
133 |
134 | return obs, rew, done, info
135 |
136 | else:
137 | self.last_action = None
138 | self.info["sensor_pose"] = [0., 0., 0.]
139 | return np.zeros(self.obs_shape), 0., False, self.info
140 |
141 | def _plan(self, planner_inputs):
142 | """Function responsible for planning
143 |
144 | Args:
145 | planner_inputs (dict):
146 | dict with following keys:
147 | 'map_pred' (ndarray): (M, M) map prediction
148 | 'goal' (ndarray): (M, M) goal locations
149 | 'pose_pred' (ndarray): (7,) array denoting pose (x,y,o)
150 | and planning window (gx1, gx2, gy1, gy2)
151 | 'found_goal' (bool): whether the goal object is found
152 |
153 | Returns:
154 | action (int): action id
155 | """
156 | args = self.args
157 |
158 | self.last_loc = self.curr_loc
159 |
160 | # Get Map prediction
161 | map_pred = np.rint(planner_inputs['map_pred'])
162 | goal = planner_inputs['goal']
163 |
164 | # Get pose prediction and global policy planning window
165 | start_x, start_y, start_o, gx1, gx2, gy1, gy2 = \
166 | planner_inputs['pose_pred']
167 | gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2)
168 | planning_window = [gx1, gx2, gy1, gy2]
169 |
170 | # Get curr loc
171 | self.curr_loc = [start_x, start_y, start_o]
172 | r, c = start_y, start_x
173 | start = [int(r * 100.0 / args.map_resolution - gx1),
174 | int(c * 100.0 / args.map_resolution - gy1)]
175 | start = pu.threshold_poses(start, map_pred.shape)
176 |
177 | self.visited[gx1:gx2, gy1:gy2][start[0] - 0:start[0] + 1,
178 | start[1] - 0:start[1] + 1] = 1
179 |
180 | if args.visualize or args.print_images:
181 | # Get last loc
182 | last_start_x, last_start_y = self.last_loc[0], self.last_loc[1]
183 | r, c = last_start_y, last_start_x
184 | last_start = [int(r * 100.0 / args.map_resolution - gx1),
185 | int(c * 100.0 / args.map_resolution - gy1)]
186 | last_start = pu.threshold_poses(last_start, map_pred.shape)
187 | self.visited_vis[gx1:gx2, gy1:gy2] = \
188 | vu.draw_line(last_start, start,
189 | self.visited_vis[gx1:gx2, gy1:gy2])
190 |
191 | # Collision check
192 | if self.last_action == 1:
193 | x1, y1, t1 = self.last_loc
194 | x2, y2, _ = self.curr_loc
195 | buf = 4
196 | length = 2
197 |
198 | if abs(x1 - x2) < 0.05 and abs(y1 - y2) < 0.05:
199 | self.col_width += 2
200 | if self.col_width == 7:
201 | length = 4
202 | buf = 3
203 | self.col_width = min(self.col_width, 5)
204 | else:
205 | self.col_width = 1
206 |
207 | dist = pu.get_l2_distance(x1, x2, y1, y2)
208 | if dist < args.collision_threshold: # Collision
209 | width = self.col_width
210 | for i in range(length):
211 | for j in range(width):
212 | wx = x1 + 0.05 * \
213 | ((i + buf) * np.cos(np.deg2rad(t1))
214 | + (j - width // 2) * np.sin(np.deg2rad(t1)))
215 | wy = y1 + 0.05 * \
216 | ((i + buf) * np.sin(np.deg2rad(t1))
217 | - (j - width // 2) * np.cos(np.deg2rad(t1)))
218 | r, c = wy, wx
219 | r, c = int(r * 100 / args.map_resolution), \
220 | int(c * 100 / args.map_resolution)
221 | [r, c] = pu.threshold_poses([r, c],
222 | self.collision_map.shape)
223 | self.collision_map[r, c] = 1
224 |
225 | stg, stop = self._get_stg(map_pred, start, np.copy(goal),
226 | planning_window)
227 |
228 | # Deterministic Local Policy
229 | if stop and planner_inputs['found_goal'] == 1:
230 | action = 0 # Stop
231 | else:
232 | (stg_x, stg_y) = stg
233 | angle_st_goal = math.degrees(math.atan2(stg_x - start[0],
234 | stg_y - start[1]))
235 | angle_agent = (start_o) % 360.0
236 | if angle_agent > 180:
237 | angle_agent -= 360
238 |
239 | relative_angle = (angle_agent - angle_st_goal) % 360.0
240 | if relative_angle > 180:
241 | relative_angle -= 360
242 |
243 | if relative_angle > self.args.turn_angle / 2.:
244 | action = 3 # Right
245 | elif relative_angle < -self.args.turn_angle / 2.:
246 | action = 2 # Left
247 | else:
248 | action = 1 # Forward
249 |
250 | return action
251 |
252 | def _get_stg(self, grid, start, goal, planning_window):
253 | """Get short-term goal"""
254 |
255 | [gx1, gx2, gy1, gy2] = planning_window
256 |
257 | x1, y1, = 0, 0
258 | x2, y2 = grid.shape
259 |
260 | def add_boundary(mat, value=1):
261 | h, w = mat.shape
262 | new_mat = np.zeros((h + 2, w + 2)) + value
263 | new_mat[1:h + 1, 1:w + 1] = mat
264 | return new_mat
265 |
266 | traversible = skimage.morphology.binary_dilation(
267 | grid[x1:x2, y1:y2],
268 | self.selem) != True
269 | traversible[self.collision_map[gx1:gx2, gy1:gy2]
270 | [x1:x2, y1:y2] == 1] = 0
271 | traversible[self.visited[gx1:gx2, gy1:gy2][x1:x2, y1:y2] == 1] = 1
272 |
273 | traversible[int(start[0] - x1) - 1:int(start[0] - x1) + 2,
274 | int(start[1] - y1) - 1:int(start[1] - y1) + 2] = 1
275 |
276 | traversible = add_boundary(traversible)
277 | goal = add_boundary(goal, value=0)
278 |
279 | planner = FMMPlanner(traversible)
280 | selem = skimage.morphology.disk(10)
281 | goal = skimage.morphology.binary_dilation(
282 | goal, selem) != True
283 | goal = 1 - goal * 1.
284 | planner.set_multi_goal(goal)
285 |
286 | state = [start[0] - x1 + 1, start[1] - y1 + 1]
287 | stg_x, stg_y, _, stop = planner.get_short_term_goal(state)
288 |
289 | stg_x, stg_y = stg_x + x1 - 1, stg_y + y1 - 1
290 |
291 | return (stg_x, stg_y), stop
292 |
293 | def _preprocess_obs(self, obs, use_seg=True):
294 | args = self.args
295 | obs = obs.transpose(1, 2, 0)
296 | rgb = obs[:, :, :3]
297 | depth = obs[:, :, 3:4]
298 |
299 | sem_seg_pred = self._get_sem_pred(
300 | rgb.astype(np.uint8), use_seg=use_seg)
301 | depth = self._preprocess_depth(depth, args.min_depth, args.max_depth)
302 |
303 | ds = args.env_frame_width // args.frame_width # Downscaling factor
304 | if ds != 1:
305 | rgb = np.asarray(self.res(rgb.astype(np.uint8)))
306 | depth = depth[ds // 2::ds, ds // 2::ds]
307 | sem_seg_pred = sem_seg_pred[ds // 2::ds, ds // 2::ds]
308 |
309 | depth = np.expand_dims(depth, axis=2)
310 | state = np.concatenate((rgb, depth, sem_seg_pred),
311 | axis=2).transpose(2, 0, 1)
312 |
313 | return state
314 |
315 | def _preprocess_depth(self, depth, min_d, max_d):
316 | depth = depth[:, :, 0] * 1
317 |
318 | for i in range(depth.shape[1]):
319 | depth[:, i][depth[:, i] == 0.] = depth[:, i].max()
320 |
321 | mask2 = depth > 0.99
322 | depth[mask2] = 0.
323 |
324 | mask1 = depth == 0
325 | depth[mask1] = 100.0
326 | depth = min_d * 100.0 + depth * max_d * 100.0
327 | return depth
328 |
329 | def _get_sem_pred(self, rgb, use_seg=True):
330 | if use_seg:
331 | semantic_pred, self.rgb_vis = self.sem_pred.get_prediction(rgb)
332 | semantic_pred = semantic_pred.astype(np.float32)
333 | else:
334 | semantic_pred = np.zeros((rgb.shape[0], rgb.shape[1], 16))
335 | self.rgb_vis = rgb[:, :, ::-1]
336 | return semantic_pred
337 |
338 | def _visualize(self, inputs):
339 | args = self.args
340 | dump_dir = "{}/dump/{}/".format(args.dump_location,
341 | args.exp_name)
342 | ep_dir = '{}/episodes/thread_{}/eps_{}/'.format(
343 | dump_dir, self.rank, self.episode_no)
344 | if not os.path.exists(ep_dir):
345 | os.makedirs(ep_dir)
346 |
347 | map_pred = inputs['map_pred']
348 | exp_pred = inputs['exp_pred']
349 | start_x, start_y, start_o, gx1, gx2, gy1, gy2 = inputs['pose_pred']
350 |
351 | goal = inputs['goal']
352 | sem_map = inputs['sem_map_pred']
353 |
354 | gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2)
355 |
356 | sem_map += 5
357 |
358 | no_cat_mask = sem_map == 20
359 | map_mask = np.rint(map_pred) == 1
360 | exp_mask = np.rint(exp_pred) == 1
361 | vis_mask = self.visited_vis[gx1:gx2, gy1:gy2] == 1
362 |
363 | sem_map[no_cat_mask] = 0
364 | m1 = np.logical_and(no_cat_mask, exp_mask)
365 | sem_map[m1] = 2
366 |
367 | m2 = np.logical_and(no_cat_mask, map_mask)
368 | sem_map[m2] = 1
369 |
370 | sem_map[vis_mask] = 3
371 |
372 | selem = skimage.morphology.disk(4)
373 | goal_mat = 1 - skimage.morphology.binary_dilation(
374 | goal, selem) != True
375 |
376 | goal_mask = goal_mat == 1
377 | sem_map[goal_mask] = 4
378 |
379 | color_pal = [int(x * 255.) for x in color_palette]
380 | sem_map_vis = Image.new("P", (sem_map.shape[1],
381 | sem_map.shape[0]))
382 | sem_map_vis.putpalette(color_pal)
383 | sem_map_vis.putdata(sem_map.flatten().astype(np.uint8))
384 | sem_map_vis = sem_map_vis.convert("RGB")
385 | sem_map_vis = np.flipud(sem_map_vis)
386 |
387 | sem_map_vis = sem_map_vis[:, :, [2, 1, 0]]
388 | sem_map_vis = cv2.resize(sem_map_vis, (480, 480),
389 | interpolation=cv2.INTER_NEAREST)
390 | self.vis_image[50:530, 15:655] = self.rgb_vis
391 | self.vis_image[50:530, 670:1150] = sem_map_vis
392 |
393 | pos = (
394 | (start_x * 100. / args.map_resolution - gy1)
395 | * 480 / map_pred.shape[0],
396 | (map_pred.shape[1] - start_y * 100. / args.map_resolution + gx1)
397 | * 480 / map_pred.shape[1],
398 | np.deg2rad(-start_o)
399 | )
400 |
401 | agent_arrow = vu.get_contour_points(pos, origin=(670, 50))
402 | color = (int(color_palette[11] * 255),
403 | int(color_palette[10] * 255),
404 | int(color_palette[9] * 255))
405 | cv2.drawContours(self.vis_image, [agent_arrow], 0, color, -1)
406 |
407 | if args.visualize:
408 | # Displaying the image
409 | cv2.imshow("Thread {}".format(self.rank), self.vis_image)
410 | cv2.waitKey(1)
411 |
412 | if args.print_images:
413 | fn = '{}/episodes/thread_{}/eps_{}/{}-{}-Vis-{}.png'.format(
414 | dump_dir, self.rank, self.episode_no,
415 | self.rank, self.episode_no, self.timestep)
416 | cv2.imwrite(fn, self.vis_image)
417 |
--------------------------------------------------------------------------------
/agents/utils/semantic_prediction.py:
--------------------------------------------------------------------------------
1 | # The following code is largely borrowed from
2 | # https://github.com/facebookresearch/detectron2/blob/master/demo/demo.py and
3 | # https://github.com/facebookresearch/detectron2/blob/master/demo/predictor.py
4 |
5 | import argparse
6 | import time
7 |
8 | import torch
9 | import numpy as np
10 |
11 | from detectron2.config import get_cfg
12 | from detectron2.utils.logger import setup_logger
13 | from detectron2.data.catalog import MetadataCatalog
14 | from detectron2.modeling import build_model
15 | from detectron2.checkpoint import DetectionCheckpointer
16 | from detectron2.utils.visualizer import ColorMode, Visualizer
17 | import detectron2.data.transforms as T
18 |
19 | from constants import coco_categories_mapping
20 |
21 |
22 | class SemanticPredMaskRCNN():
23 |
24 | def __init__(self, args):
25 | self.segmentation_model = ImageSegmentation(args)
26 | self.args = args
27 |
28 | def get_prediction(self, img):
29 | args = self.args
30 | image_list = []
31 | img = img[:, :, ::-1]
32 | image_list.append(img)
33 | seg_predictions, vis_output = self.segmentation_model.get_predictions(
34 | image_list, visualize=args.visualize == 2)
35 |
36 | if args.visualize == 2:
37 | img = vis_output.get_image()
38 |
39 | semantic_input = np.zeros((img.shape[0], img.shape[1], 15 + 1))
40 |
41 | for j, class_idx in enumerate(
42 | seg_predictions[0]['instances'].pred_classes.cpu().numpy()):
43 | if class_idx in list(coco_categories_mapping.keys()):
44 | idx = coco_categories_mapping[class_idx]
45 | obj_mask = seg_predictions[0]['instances'].pred_masks[j] * 1.
46 | semantic_input[:, :, idx] += obj_mask.cpu().numpy()
47 |
48 | return semantic_input, img
49 |
50 |
51 | def compress_sem_map(sem_map):
52 | c_map = np.zeros((sem_map.shape[1], sem_map.shape[2]))
53 | for i in range(sem_map.shape[0]):
54 | c_map[sem_map[i] > 0.] = i + 1
55 | return c_map
56 |
57 |
58 | class ImageSegmentation():
59 | def __init__(self, args):
60 | string_args = """
61 | --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
62 | --input input1.jpeg
63 | --confidence-threshold {}
64 | --opts MODEL.WEIGHTS
65 | detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
66 | """.format(args.sem_pred_prob_thr)
67 |
68 | if args.sem_gpu_id == -2:
69 | string_args += """ MODEL.DEVICE cpu"""
70 | else:
71 | string_args += """ MODEL.DEVICE cuda:{}""".format(args.sem_gpu_id)
72 |
73 | string_args = string_args.split()
74 |
75 | args = get_seg_parser().parse_args(string_args)
76 | logger = setup_logger()
77 | logger.info("Arguments: " + str(args))
78 |
79 | cfg = setup_cfg(args)
80 | self.demo = VisualizationDemo(cfg)
81 |
82 | def get_predictions(self, img, visualize=0):
83 | return self.demo.run_on_image(img, visualize=visualize)
84 |
85 |
86 | def setup_cfg(args):
87 | # load config from file and command-line arguments
88 | cfg = get_cfg()
89 | cfg.merge_from_file(args.config_file)
90 | cfg.merge_from_list(args.opts)
91 | # Set score_threshold for builtin models
92 | cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
93 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
94 | cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = \
95 | args.confidence_threshold
96 | cfg.freeze()
97 | return cfg
98 |
99 |
100 | def get_seg_parser():
101 | parser = argparse.ArgumentParser(
102 | description="Detectron2 demo for builtin models")
103 | parser.add_argument(
104 | "--config-file",
105 | default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
106 | metavar="FILE",
107 | help="path to config file",
108 | )
109 | parser.add_argument(
110 | "--webcam",
111 | action="store_true",
112 | help="Take inputs from webcam.")
113 | parser.add_argument("--video-input", help="Path to video file.")
114 | parser.add_argument(
115 | "--input",
116 | nargs="+",
117 | help="A list of space separated input images")
118 | parser.add_argument(
119 | "--output",
120 | help="A file or directory to save output visualizations. "
121 | "If not given, will show output in an OpenCV window.",
122 | )
123 |
124 | parser.add_argument(
125 | "--confidence-threshold",
126 | type=float,
127 | default=0.5,
128 | help="Minimum score for instance predictions to be shown",
129 | )
130 | parser.add_argument(
131 | "--opts",
132 | help="Modify config options using the command-line 'KEY VALUE' pairs",
133 | default=[],
134 | nargs=argparse.REMAINDER,
135 | )
136 | return parser
137 |
138 |
139 | class VisualizationDemo(object):
140 | def __init__(self, cfg, instance_mode=ColorMode.IMAGE):
141 | """
142 | Args:
143 | cfg (CfgNode):
144 | instance_mode (ColorMode):
145 | """
146 | self.metadata = MetadataCatalog.get(
147 | cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
148 | )
149 | self.cpu_device = torch.device("cpu")
150 | self.instance_mode = instance_mode
151 |
152 | self.predictor = BatchPredictor(cfg)
153 |
154 | def run_on_image(self, image_list, visualize=0):
155 | """
156 | Args:
157 | image (np.ndarray): an image of shape (H, W, C) (in BGR order).
158 | This is the format used by OpenCV.
159 |
160 | Returns:
161 | predictions (dict): the output of the model.
162 | vis_output (VisImage): the visualized image output.
163 | """
164 | vis_output = None
165 | all_predictions = self.predictor(image_list)
166 | # Convert image from OpenCV BGR format to Matplotlib RGB format.
167 |
168 | if visualize:
169 | predictions = all_predictions[0]
170 | image = image_list[0]
171 | visualizer = Visualizer(
172 | image, self.metadata, instance_mode=self.instance_mode)
173 | if "panoptic_seg" in predictions:
174 | panoptic_seg, segments_info = predictions["panoptic_seg"]
175 | vis_output = visualizer.draw_panoptic_seg_predictions(
176 | panoptic_seg.to(self.cpu_device), segments_info
177 | )
178 | else:
179 | if "sem_seg" in predictions:
180 | vis_output = visualizer.draw_sem_seg(
181 | predictions["sem_seg"].argmax(
182 | dim=0).to(self.cpu_device)
183 | )
184 | if "instances" in predictions:
185 | instances = predictions["instances"].to(self.cpu_device)
186 | vis_output = visualizer.draw_instance_predictions(
187 | predictions=instances)
188 |
189 | return all_predictions, vis_output
190 |
191 |
192 | class BatchPredictor:
193 | """
194 | Create a simple end-to-end predictor with the given config that runs on
195 | single device for a list of input images.
196 |
197 | Compared to using the model directly, this class does the following
198 | additions:
199 |
200 | 1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
201 | 2. Always take BGR image as the input and apply conversion defined by
202 | `cfg.INPUT.FORMAT`.
203 | 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
204 | 4. Take a list of input images
205 |
206 | Attributes:
207 | metadata (Metadata): the metadata of the underlying dataset, obtained
208 | from cfg.DATASETS.TEST.
209 |
210 | """
211 |
212 | def __init__(self, cfg):
213 | self.cfg = cfg.clone() # cfg can be modified by model
214 | self.model = build_model(self.cfg)
215 | self.model.eval()
216 | self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
217 |
218 | checkpointer = DetectionCheckpointer(self.model)
219 | checkpointer.load(cfg.MODEL.WEIGHTS)
220 |
221 | self.input_format = cfg.INPUT.FORMAT
222 | assert self.input_format in ["RGB", "BGR"], self.input_format
223 |
224 | def __call__(self, image_list):
225 | """
226 | Args:
227 | image_list (list of np.ndarray): a list of images of
228 | shape (H, W, C) (in BGR order).
229 |
230 | Returns:
231 | predictions (dict):
232 | the output of the model for all images.
233 | See :doc:`/tutorials/models` for details about the format.
234 | """
235 | inputs = []
236 | for original_image in image_list:
237 | # https://github.com/sphinx-doc/sphinx/issues/4258
238 | # Apply pre-processing to image.
239 | if self.input_format == "RGB":
240 | # whether the model expects BGR inputs or RGB
241 | original_image = original_image[:, :, ::-1]
242 | height, width = original_image.shape[:2]
243 | image = original_image
244 | image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
245 |
246 | instance = {"image": image, "height": height, "width": width}
247 |
248 | inputs.append(instance)
249 |
250 | with torch.no_grad():
251 | predictions = self.model(inputs)
252 | return predictions
253 |
--------------------------------------------------------------------------------
/agents/utils/visualization.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 |
5 | def get_contour_points(pos, origin, size=20):
6 | x, y, o = pos
7 | pt1 = (int(x) + origin[0],
8 | int(y) + origin[1])
9 | pt2 = (int(x + size / 1.5 * np.cos(o + np.pi * 4 / 3)) + origin[0],
10 | int(y + size / 1.5 * np.sin(o + np.pi * 4 / 3)) + origin[1])
11 | pt3 = (int(x + size * np.cos(o)) + origin[0],
12 | int(y + size * np.sin(o)) + origin[1])
13 | pt4 = (int(x + size / 1.5 * np.cos(o - np.pi * 4 / 3)) + origin[0],
14 | int(y + size / 1.5 * np.sin(o - np.pi * 4 / 3)) + origin[1])
15 |
16 | return np.array([pt1, pt2, pt3, pt4])
17 |
18 |
19 | def draw_line(start, end, mat, steps=25, w=1):
20 | for i in range(steps + 1):
21 | x = int(np.rint(start[0] + (end[0] - start[0]) * i / steps))
22 | y = int(np.rint(start[1] + (end[1] - start[1]) * i / steps))
23 | mat[x - w:x + w, y - w:y + w] = 1
24 | return mat
25 |
26 |
27 | def init_vis_image(goal_name, legend):
28 | vis_image = np.ones((655, 1165, 3)).astype(np.uint8) * 255
29 | font = cv2.FONT_HERSHEY_SIMPLEX
30 | fontScale = 1
31 | color = (20, 20, 20) # BGR
32 | thickness = 2
33 |
34 | text = "Observations (Goal: {})".format(goal_name)
35 | textsize = cv2.getTextSize(text, font, fontScale, thickness)[0]
36 | textX = (640 - textsize[0]) // 2 + 15
37 | textY = (50 + textsize[1]) // 2
38 | vis_image = cv2.putText(vis_image, text, (textX, textY),
39 | font, fontScale, color, thickness,
40 | cv2.LINE_AA)
41 |
42 | text = "Predicted Semantic Map"
43 | textsize = cv2.getTextSize(text, font, fontScale, thickness)[0]
44 | textX = 640 + (480 - textsize[0]) // 2 + 30
45 | textY = (50 + textsize[1]) // 2
46 | vis_image = cv2.putText(vis_image, text, (textX, textY),
47 | font, fontScale, color, thickness,
48 | cv2.LINE_AA)
49 |
50 | # draw outlines
51 | color = [100, 100, 100]
52 | vis_image[49, 15:655] = color
53 | vis_image[49, 670:1150] = color
54 | vis_image[50:530, 14] = color
55 | vis_image[50:530, 655] = color
56 | vis_image[50:530, 669] = color
57 | vis_image[50:530, 1150] = color
58 | vis_image[530, 15:655] = color
59 | vis_image[530, 670:1150] = color
60 |
61 | # draw legend
62 | lx, ly, _ = legend.shape
63 | vis_image[537:537 + lx, 155:155 + ly, :] = legend
64 |
65 | return vis_image
66 |
--------------------------------------------------------------------------------
/algo/__init__.py:
--------------------------------------------------------------------------------
1 | from .ppo import PPO
2 |
--------------------------------------------------------------------------------
/algo/ppo.py:
--------------------------------------------------------------------------------
1 | # The following code is largely borrowed from:
2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/algo/ppo.py
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.optim as optim
7 |
8 |
9 | class PPO():
10 |
11 | def __init__(
12 | self,
13 | actor_critic,
14 | clip_param,
15 | ppo_epoch,
16 | num_mini_batch,
17 | value_loss_coef,
18 | entropy_coef,
19 | lr=None,
20 | eps=None,
21 | max_grad_norm=None,
22 | use_clipped_value_loss=True):
23 |
24 | self.actor_critic = actor_critic
25 |
26 | self.clip_param = clip_param
27 | self.ppo_epoch = ppo_epoch
28 | self.num_mini_batch = num_mini_batch
29 |
30 | self.value_loss_coef = value_loss_coef
31 | self.entropy_coef = entropy_coef
32 |
33 | self.max_grad_norm = max_grad_norm
34 | self.use_clipped_value_loss = use_clipped_value_loss
35 |
36 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad,
37 | actor_critic.parameters()),
38 | lr=lr, eps=eps)
39 |
40 | def update(self, rollouts):
41 | advantages = rollouts.returns[:-1] - rollouts.value_preds[:-1]
42 | advantages = (advantages - advantages.mean()) / (
43 | advantages.std() + 1e-5)
44 |
45 | value_loss_epoch = 0
46 | action_loss_epoch = 0
47 | dist_entropy_epoch = 0
48 |
49 | for _ in range(self.ppo_epoch):
50 |
51 | if self.actor_critic.is_recurrent:
52 | data_generator = rollouts.recurrent_generator(
53 | advantages, self.num_mini_batch)
54 | else:
55 | data_generator = rollouts.feed_forward_generator(
56 | advantages, self.num_mini_batch)
57 |
58 | for sample in data_generator:
59 |
60 | value_preds = sample['value_preds']
61 | returns = sample['returns']
62 | adv_targ = sample['adv_targ']
63 |
64 | # Reshape to do in a single forward pass for all steps
65 | values, action_log_probs, dist_entropy, _ = \
66 | self.actor_critic.evaluate_actions(
67 | sample['obs'], sample['rec_states'],
68 | sample['masks'], sample['actions'],
69 | extras=sample['extras']
70 | )
71 |
72 | ratio = torch.exp(action_log_probs -
73 | sample['old_action_log_probs'])
74 | surr1 = ratio * adv_targ
75 | surr2 = torch.clamp(ratio, 1.0 - self.clip_param,
76 | 1.0 + self.clip_param) * adv_targ
77 | action_loss = -torch.min(surr1, surr2).mean()
78 |
79 | if self.use_clipped_value_loss:
80 | value_pred_clipped = value_preds + \
81 | (values - value_preds).clamp(
82 | -self.clip_param, self.clip_param)
83 | value_losses = (values - returns).pow(2)
84 | value_losses_clipped = (value_pred_clipped
85 | - returns).pow(2)
86 | value_loss = .5 * torch.max(value_losses,
87 | value_losses_clipped).mean()
88 | else:
89 | value_loss = 0.5 * (returns - values).pow(2).mean()
90 |
91 | self.optimizer.zero_grad()
92 | (value_loss * self.value_loss_coef + action_loss -
93 | dist_entropy * self.entropy_coef).backward()
94 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(),
95 | self.max_grad_norm)
96 | self.optimizer.step()
97 |
98 | value_loss_epoch += value_loss.item()
99 | action_loss_epoch += action_loss.item()
100 | dist_entropy_epoch += dist_entropy.item()
101 |
102 | num_updates = self.ppo_epoch * self.num_mini_batch
103 |
104 | value_loss_epoch /= num_updates
105 | action_loss_epoch /= num_updates
106 | dist_entropy_epoch /= num_updates
107 |
108 | return value_loss_epoch, action_loss_epoch, dist_entropy_epoch
109 |
--------------------------------------------------------------------------------
/arguments.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import torch
3 |
4 |
5 | def get_args():
6 | parser = argparse.ArgumentParser(
7 | description='Goal-Oriented-Semantic-Exploration')
8 |
9 | # General Arguments
10 | parser.add_argument('--seed', type=int, default=1,
11 | help='random seed (default: 1)')
12 | parser.add_argument('--auto_gpu_config', type=int, default=1)
13 | parser.add_argument('--total_num_scenes', type=str, default="auto")
14 | parser.add_argument('-n', '--num_processes', type=int, default=5,
15 | help="""how many training processes to use (default:5)
16 | Overridden when auto_gpu_config=1
17 | and training on gpus""")
18 | parser.add_argument('--num_processes_per_gpu', type=int, default=6)
19 | parser.add_argument('--num_processes_on_first_gpu', type=int, default=1)
20 | parser.add_argument('--eval', type=int, default=0,
21 | help='0: Train, 1: Evaluate (default: 0)')
22 | parser.add_argument('--num_training_frames', type=int, default=10000000,
23 | help='total number of training frames')
24 | parser.add_argument('--num_eval_episodes', type=int, default=200,
25 | help="number of test episodes per scene")
26 | parser.add_argument('--num_train_episodes', type=int, default=10000,
27 | help="""number of train episodes per scene
28 | before loading the next scene""")
29 | parser.add_argument('--no_cuda', action='store_true', default=False,
30 | help='disables CUDA training')
31 | parser.add_argument("--sim_gpu_id", type=int, default=0,
32 | help="gpu id on which scenes are loaded")
33 | parser.add_argument("--sem_gpu_id", type=int, default=-1,
34 | help="""gpu id for semantic model,
35 | -1: same as sim gpu, -2: cpu""")
36 |
37 | # Logging, loading models, visualization
38 | parser.add_argument('--log_interval', type=int, default=10,
39 | help="""log interval, one log per n updates
40 | (default: 10) """)
41 | parser.add_argument('--save_interval', type=int, default=1,
42 | help="""save interval""")
43 | parser.add_argument('-d', '--dump_location', type=str, default="./tmp/",
44 | help='path to dump models and log (default: ./tmp/)')
45 | parser.add_argument('--exp_name', type=str, default="exp1",
46 | help='experiment name (default: exp1)')
47 | parser.add_argument('--save_periodic', type=int, default=500000,
48 | help='Model save frequency in number of updates')
49 | parser.add_argument('--load', type=str, default="0",
50 | help="""model path to load,
51 | 0 to not reload (default: 0)""")
52 | parser.add_argument('-v', '--visualize', type=int, default=0,
53 | help="""1: Render the observation and
54 | the predicted semantic map,
55 | 2: Render the observation with semantic
56 | predictions and the predicted semantic map
57 | (default: 0)""")
58 | parser.add_argument('--print_images', type=int, default=0,
59 | help='1: save visualization as images')
60 |
61 | # Environment, dataset and episode specifications
62 | parser.add_argument('-efw', '--env_frame_width', type=int, default=640,
63 | help='Frame width (default:640)')
64 | parser.add_argument('-efh', '--env_frame_height', type=int, default=480,
65 | help='Frame height (default:480)')
66 | parser.add_argument('-fw', '--frame_width', type=int, default=160,
67 | help='Frame width (default:160)')
68 | parser.add_argument('-fh', '--frame_height', type=int, default=120,
69 | help='Frame height (default:120)')
70 | parser.add_argument('-el', '--max_episode_length', type=int, default=500,
71 | help="""Maximum episode length""")
72 | parser.add_argument("--task_config", type=str,
73 | default="tasks/objectnav_gibson.yaml",
74 | help="path to config yaml containing task information")
75 | parser.add_argument("--split", type=str, default="train",
76 | help="dataset split (train | val | val_mini) ")
77 | parser.add_argument('--camera_height', type=float, default=0.88,
78 | help="agent camera height in metres")
79 | parser.add_argument('--hfov', type=float, default=79.0,
80 | help="horizontal field of view in degrees")
81 | parser.add_argument('--turn_angle', type=float, default=30,
82 | help="Agent turn angle in degrees")
83 | parser.add_argument('--min_depth', type=float, default=0.5,
84 | help="Minimum depth for depth sensor in meters")
85 | parser.add_argument('--max_depth', type=float, default=5.0,
86 | help="Maximum depth for depth sensor in meters")
87 | parser.add_argument('--success_dist', type=float, default=1.0,
88 | help="success distance threshold in meters")
89 | parser.add_argument('--floor_thr', type=int, default=50,
90 | help="floor threshold in cm")
91 | parser.add_argument('--min_d', type=float, default=1.5,
92 | help="min distance to goal during training in meters")
93 | parser.add_argument('--max_d', type=float, default=100.0,
94 | help="max distance to goal during training in meters")
95 | parser.add_argument('--version', type=str, default="v1.1",
96 | help="dataset version")
97 |
98 | # Model Hyperparameters
99 | parser.add_argument('--agent', type=str, default="sem_exp")
100 | parser.add_argument('--lr', type=float, default=2.5e-5,
101 | help='learning rate (default: 2.5e-5)')
102 | parser.add_argument('--global_hidden_size', type=int, default=256,
103 | help='global_hidden_size')
104 | parser.add_argument('--eps', type=float, default=1e-5,
105 | help='RL Optimizer epsilon (default: 1e-5)')
106 | parser.add_argument('--alpha', type=float, default=0.99,
107 | help='RL Optimizer alpha (default: 0.99)')
108 | parser.add_argument('--gamma', type=float, default=0.99,
109 | help='discount factor for rewards (default: 0.99)')
110 | parser.add_argument('--use_gae', action='store_true', default=False,
111 | help='use generalized advantage estimation')
112 | parser.add_argument('--tau', type=float, default=0.95,
113 | help='gae parameter (default: 0.95)')
114 | parser.add_argument('--entropy_coef', type=float, default=0.001,
115 | help='entropy term coefficient (default: 0.01)')
116 | parser.add_argument('--value_loss_coef', type=float, default=0.5,
117 | help='value loss coefficient (default: 0.5)')
118 | parser.add_argument('--max_grad_norm', type=float, default=0.5,
119 | help='max norm of gradients (default: 0.5)')
120 | parser.add_argument('--num_global_steps', type=int, default=20,
121 | help='number of forward steps in A2C (default: 5)')
122 | parser.add_argument('--ppo_epoch', type=int, default=4,
123 | help='number of ppo epochs (default: 4)')
124 | parser.add_argument('--num_mini_batch', type=str, default="auto",
125 | help='number of batches for ppo (default: 32)')
126 | parser.add_argument('--clip_param', type=float, default=0.2,
127 | help='ppo clip parameter (default: 0.2)')
128 | parser.add_argument('--use_recurrent_global', type=int, default=0,
129 | help='use a recurrent global policy')
130 | parser.add_argument('--num_local_steps', type=int, default=25,
131 | help="""Number of steps the local policy
132 | between each global step""")
133 | parser.add_argument('--reward_coeff', type=float, default=0.1,
134 | help="Object goal reward coefficient")
135 | parser.add_argument('--intrinsic_rew_coeff', type=float, default=0.02,
136 | help="intrinsic exploration reward coefficient")
137 | parser.add_argument('--num_sem_categories', type=float, default=16)
138 | parser.add_argument('--sem_pred_prob_thr', type=float, default=0.9,
139 | help="Semantic prediction confidence threshold")
140 |
141 | # Mapping
142 | parser.add_argument('--global_downscaling', type=int, default=2)
143 | parser.add_argument('--vision_range', type=int, default=100)
144 | parser.add_argument('--map_resolution', type=int, default=5)
145 | parser.add_argument('--du_scale', type=int, default=1)
146 | parser.add_argument('--map_size_cm', type=int, default=2400)
147 | parser.add_argument('--cat_pred_threshold', type=float, default=5.0)
148 | parser.add_argument('--map_pred_threshold', type=float, default=1.0)
149 | parser.add_argument('--exp_pred_threshold', type=float, default=1.0)
150 | parser.add_argument('--collision_threshold', type=float, default=0.20)
151 |
152 | # parse arguments
153 | args = parser.parse_args()
154 |
155 | args.cuda = not args.no_cuda and torch.cuda.is_available()
156 |
157 | if args.cuda:
158 | if args.auto_gpu_config:
159 | num_gpus = torch.cuda.device_count()
160 | if args.total_num_scenes != "auto":
161 | args.total_num_scenes = int(args.total_num_scenes)
162 | elif "objectnav_gibson" in args.task_config and \
163 | "train" in args.split:
164 | args.total_num_scenes = 25
165 | elif "objectnav_gibson" in args.task_config and \
166 | "val" in args.split:
167 | args.total_num_scenes = 5
168 | else:
169 | assert False, "Unknown task config, please specify" + \
170 | " total_num_scenes"
171 |
172 | # GPU Memory required for the SemExp model:
173 | # 0.8 + 0.4 * args.total_num_scenes (GB)
174 | # GPU Memory required per thread: 2.6 (GB)
175 | min_memory_required = max(0.8 + 0.4 * args.total_num_scenes, 2.6)
176 | # Automatically configure number of training threads based on
177 | # number of GPUs available and GPU memory size
178 | gpu_memory = 1000
179 | for i in range(num_gpus):
180 | gpu_memory = min(gpu_memory,
181 | torch.cuda.get_device_properties(
182 | i).total_memory
183 | / 1024 / 1024 / 1024)
184 | assert gpu_memory > min_memory_required, \
185 | """Insufficient GPU memory for GPU {}, gpu memory ({}GB)
186 | needs to be greater than {}GB""".format(
187 | i, gpu_memory, min_memory_required)
188 |
189 | num_processes_per_gpu = int(gpu_memory / 2.6)
190 | num_processes_on_first_gpu = \
191 | int((gpu_memory - min_memory_required) / 2.6)
192 |
193 | if args.eval:
194 | max_threads = num_processes_per_gpu * (num_gpus - 1) \
195 | + num_processes_on_first_gpu
196 | assert max_threads >= args.total_num_scenes, \
197 | """Insufficient GPU memory for evaluation"""
198 |
199 | if num_gpus == 1:
200 | args.num_processes_on_first_gpu = num_processes_on_first_gpu
201 | args.num_processes_per_gpu = 0
202 | args.num_processes = num_processes_on_first_gpu
203 | assert args.num_processes > 0, "Insufficient GPU memory"
204 | else:
205 | num_threads = num_processes_per_gpu * (num_gpus - 1) \
206 | + num_processes_on_first_gpu
207 | num_threads = min(num_threads, args.total_num_scenes)
208 | args.num_processes_per_gpu = num_processes_per_gpu
209 | args.num_processes_on_first_gpu = max(
210 | 0,
211 | num_threads - args.num_processes_per_gpu * (num_gpus - 1))
212 | args.num_processes = num_threads
213 |
214 | args.sim_gpu_id = 1
215 |
216 | print("Auto GPU config:")
217 | print("Number of processes: {}".format(args.num_processes))
218 | print("Number of processes on GPU 0: {}".format(
219 | args.num_processes_on_first_gpu))
220 | print("Number of processes per GPU: {}".format(
221 | args.num_processes_per_gpu))
222 | else:
223 | args.sem_gpu_id = -2
224 |
225 | if args.num_mini_batch == "auto":
226 | args.num_mini_batch = max(args.num_processes // 2, 1)
227 | else:
228 | args.num_mini_batch = int(args.num_mini_batch)
229 |
230 | return args
231 |
--------------------------------------------------------------------------------
/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | NAME: "build_resnet_fpn_backbone"
5 | RESNETS:
6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
7 | FPN:
8 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
9 | ANCHOR_GENERATOR:
10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
12 | RPN:
13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level
16 | # Detectron1 uses 2000 proposals per-batch,
17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 | POST_NMS_TOPK_TRAIN: 1000
20 | POST_NMS_TOPK_TEST: 1000
21 | ROI_HEADS:
22 | NAME: "StandardROIHeads"
23 | IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 | ROI_BOX_HEAD:
25 | NAME: "FastRCNNConvFCHead"
26 | NUM_FC: 2
27 | POOLER_RESOLUTION: 7
28 | ROI_MASK_HEAD:
29 | NAME: "MaskRCNNConvUpsampleHead"
30 | NUM_CONV: 4
31 | POOLER_RESOLUTION: 14
32 | DATASETS:
33 | TRAIN: ("coco_2017_train",)
34 | TEST: ("coco_2017_val",)
35 | SOLVER:
36 | IMS_PER_BATCH: 16
37 | BASE_LR: 0.02
38 | STEPS: (60000, 80000)
39 | MAX_ITER: 90000
40 | INPUT:
41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 |
--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
1 | scenes = {}
2 | scenes["train"] = [
3 | 'Allensville',
4 | 'Beechwood',
5 | 'Benevolence',
6 | 'Coffeen',
7 | 'Cosmos',
8 | 'Forkland',
9 | 'Hanson',
10 | 'Hiteman',
11 | 'Klickitat',
12 | 'Lakeville',
13 | 'Leonardo',
14 | 'Lindenwood',
15 | 'Marstons',
16 | 'Merom',
17 | 'Mifflinburg',
18 | 'Newfields',
19 | 'Onaga',
20 | 'Pinesdale',
21 | 'Pomaria',
22 | 'Ranchester',
23 | 'Shelbyville',
24 | 'Stockman',
25 | 'Tolstoy',
26 | 'Wainscott',
27 | 'Woodbine',
28 | ]
29 |
30 | scenes["val"] = [
31 | 'Collierville',
32 | 'Corozal',
33 | 'Darden',
34 | 'Markleeville',
35 | 'Wiconisco',
36 | ]
37 |
38 | coco_categories = {
39 | "chair": 0,
40 | "couch": 1,
41 | "potted plant": 2,
42 | "bed": 3,
43 | "toilet": 4,
44 | "tv": 5,
45 | "dining-table": 6,
46 | "oven": 7,
47 | "sink": 8,
48 | "refrigerator": 9,
49 | "book": 10,
50 | "clock": 11,
51 | "vase": 12,
52 | "cup": 13,
53 | "bottle": 14
54 | }
55 |
56 | coco_categories_mapping = {
57 | 56: 0, # chair
58 | 57: 1, # couch
59 | 58: 2, # potted plant
60 | 59: 3, # bed
61 | 61: 4, # toilet
62 | 62: 5, # tv
63 | 60: 6, # dining-table
64 | 69: 7, # oven
65 | 71: 8, # sink
66 | 72: 9, # refrigerator
67 | 73: 10, # book
68 | 74: 11, # clock
69 | 75: 12, # vase
70 | 41: 13, # cup
71 | 39: 14, # bottle
72 | }
73 |
74 | color_palette = [
75 | 1.0, 1.0, 1.0,
76 | 0.6, 0.6, 0.6,
77 | 0.95, 0.95, 0.95,
78 | 0.96, 0.36, 0.26,
79 | 0.12156862745098039, 0.47058823529411764, 0.7058823529411765,
80 | 0.9400000000000001, 0.7818, 0.66,
81 | 0.9400000000000001, 0.8868, 0.66,
82 | 0.8882000000000001, 0.9400000000000001, 0.66,
83 | 0.7832000000000001, 0.9400000000000001, 0.66,
84 | 0.6782000000000001, 0.9400000000000001, 0.66,
85 | 0.66, 0.9400000000000001, 0.7468000000000001,
86 | 0.66, 0.9400000000000001, 0.8518000000000001,
87 | 0.66, 0.9232, 0.9400000000000001,
88 | 0.66, 0.8182, 0.9400000000000001,
89 | 0.66, 0.7132, 0.9400000000000001,
90 | 0.7117999999999999, 0.66, 0.9400000000000001,
91 | 0.8168, 0.66, 0.9400000000000001,
92 | 0.9218, 0.66, 0.9400000000000001,
93 | 0.9400000000000001, 0.66, 0.8531999999999998,
94 | 0.9400000000000001, 0.66, 0.748199999999999]
95 |
--------------------------------------------------------------------------------
/docs/DOCKER_INSTRUCTIONS.md:
--------------------------------------------------------------------------------
1 | # Docker and Singularity Instructions:
2 | We provide experimental [docker](https://www.docker.com/) and [singularity](https://sylabs.io/) images with all the dependencies installed.
3 |
4 | Before setting up the docker, pull the code using:
5 | ```
6 | git clone https://github.com/devendrachaplot/Object-Goal-Navigation/
7 | cd Object-Goal-Navigation/;
8 | ```
9 | Download and set up the scene and episode datasets as described [here](README.md#setup).
10 |
11 | For docker, either build docker image using the provided [Dockerfile](./Dockerfile):
12 | ```
13 | docker build -t devendrachaplot/habitat:sem_exp .
14 | ```
15 | Or pull docker image from dockerhub:
16 | ```
17 | docker pull devendrachaplot/habitat:sem_exp
18 | ```
19 |
20 | After building or pulling the docker image, run the docker using:
21 | ```
22 | docker run -v $(pwd)/:/code -v $(pwd)/data:/code/data --runtime=nvidia -it devendrachaplot/habitat:sem_exp
23 | ```
24 |
25 | Inside the docker, check the habitat compatibility with your system:
26 | ```
27 | cd /habitat-api/
28 | python examples/benchmark.py
29 | ```
30 |
31 | To run the SemExp model inside the docker, `cd /code/` and run the same commands as described in [INSTRUCTIONS](./INSTRUCTIONS.md).
32 |
33 | For pulling the singularity image:
34 | ```
35 | singularity pull docker://devendrachaplot/habitat:sem_exp
36 | ```
--------------------------------------------------------------------------------
/docs/INSTRUCTIONS.md:
--------------------------------------------------------------------------------
1 | # Instructions
2 |
3 | ## Training
4 | For training the SemExp model on the Object Goal Navigation task:
5 | ```
6 | python main.py
7 | ```
8 |
9 | ### Specifying number of threads
10 | The code runs multiple parallel threads for training. Each thread loads a scene on a GPU. The code automatically decides the total number of threads and number of threads on each GPU based on the available GPUs.
11 |
12 | If you would like to not use the auto gpu config, you need to specify the following:
13 | ```
14 | --auto_gpu_config 0
15 | -n, --num_processes NUM_PROCESSES
16 | --num_processes_per_gpu NUM_PROCESSES_PER_GPU
17 | --num_processes_on_first_gpu NUM_PROCESSES_ON_FIRST_GPU
18 | ```
19 | `NUM_PROCESSES_PER_GPU` will depend on your GPU memory, 6 works well for 16GB GPUs.
20 | `NUM_PROCESSES_ON_FIRST_GPU` specifies the number of processes on the first GPU in addition to the SemExp model, 1 works well for 16GB GPUs.
21 | `NUM_PROCESSES` depends on the number of GPUs used for training and `NUM_PROCESSES_PER_GPU` such that
22 | ```
23 | NUM_PROCESSES <= min(NUM_PROCESSES_PER_GPU * number of GPUs + NUM_PROCESSES_ON_FIRST_GPU, 25)
24 | ```
25 | The Gibson training set consists of 25 scenes.
26 |
27 | For example, for training the model on 5 GPUs with 16GB memory per GPU:
28 | ```
29 | python main.py --auto_gpu_config 0 -n 25 --num_processes_per_gpu 6 --num_processes_on_first_gpu 1 --sim_gpu_id 1
30 | ```
31 | Here, `sim_gpu_id = 1` specifies simulator threads to run from GPUs 1 onwards.
32 | Each GPU from 1 to 4 will run 6 threads each, and GPU 0 will run 1 thread and
33 | the SemExp model.
34 |
35 | ### Specifying log location, periodic model dumps
36 | ```
37 | python main.py -d saved/ --exp_name exp1 --save_periodic 500000
38 | ```
39 | The above will save the best model files and training log at `saved/models/exp1/` and save all models periodically every 500000 steps at `saved/dump/exp1/`. Each module will be saved in a separate file.
40 |
41 | ### Hyper-parameters
42 | Most of the default hyper-parameters should work fine. Some hyperparameters are set for training with 25 threads, which might need to be tuned when using fewer threads. Fewer threads lead to a smaller batch size so the learning rate might need to be tuned using `--lr`.
43 |
44 | ## Downloading pre-trained models
45 | ```
46 | mkdir pretrained_models;
47 | wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=171ZA7XNu5vi3XLpuKs8DuGGZrYyuSjL0' -O pretrained_models/sem_exp.pth
48 | ```
49 |
50 | ## Evaluation
51 |
52 | The following are instructions to evaluate the model on the Gibson val set.
53 |
54 | For evaluating the pre-trained model:
55 | ```
56 | python main.py --split val --eval 1 --load pretrained_models/sem_exp.pth
57 | ```
58 |
59 | The pre-trained model should get 0.657 Success, 0.339 SPL and 1.474 DTG.
60 |
61 | ### Manual GPU config
62 |
63 | If you would like to not use the auto GPU config, specify the number of threads for evaluation using `--num_processes` and the number of evaluation episodes per thread using `--num_eval_episodes`.
64 | The Gibson val set consists of 5 scenes and 200 episodes per scene. Thus, we need to use 5 threads for evaluation, and 200 episodes per thread. Split 5 scenes on GPUs based on your GPU memory sizes. The code requires `0.8 + 0.4 * num_scenes (GB)` GPU memory on the first GPU for the model and around 2.6GB memory per scene.
65 |
66 | For example, if you have 1 GPU with 16GB memory:
67 | ```
68 | python main.py --split val --eval 1 --auto_gpu_config 0 \
69 | -n 5 --num_eval_episodes 200 --num_processes_on_first_gpu 5 \
70 | --load pretrained_models/sem_exp.pth
71 | ```
72 | or if you have 2 GPUs with 12GB memory each:
73 | ```
74 | python main.py --split val --eval 1 --auto_gpu_config 0 \
75 | -n 5 --num_eval_episodes 200 --num_processes_on_first_gpu 1 \
76 | --num_processes_per_gpu 4 --sim_gpu_id 1 \
77 | --load pretrained_models/sem_exp.pth
78 | ```
79 |
80 | ### Visualization and printing images
81 | For visualizing the agent observations and predicted map and pose, add `-v 1` as an argument to the above command. This will require a display to be attached to the system.
82 |
83 | To visualize on headless systems (without display), use `--print_images 1 -d results/ --exp_name exp1`. This will save the visualization images in `results/dump/exp1/episodes/`.
84 |
85 | Both `-v 1` and `--print_images 1` can be used together to visualize and print images at the same time.
86 |
87 |
88 | ## Notes
89 |
90 | - Training the model for 10 million frames with 25 threads takes around 2.5 days on an Nvidia DGX-1 system using 5 16GB GPUs, but the model provides good performance even with only 1 million frames (~6 hrs) of training.
91 |
92 | - Evaluating the model on the val set for 1000 episodes with 5 threads takes around 2.5 hrs on an Nvidia DGX-1 system.
93 |
94 | - The code does not contain the Denoising Network described in our [paper](https://arxiv.org/pdf/2007.00643.pdf).
95 | This is because of the following reasons:
96 | - Training the Denoising Network requires downloading the original Gibson dataset (non-Habitat format), 3DSceneGraph dataset, and building Habitat format semantic scenes using both the datasets.
97 | - Training the Denoising Network requires building and cleaning top-down maps which makes training much slower.
98 | - The first-person semantic annotations for Gibson are not perfectly accurate, they do not align with the depth sensor. This results in Denoising Network only providing a marginal performance improvement.
99 |
100 |
101 | ## Tips
102 | To silence the habitat sim log add the following to your `~/.bashrc` (Linux) or `~/.bash_profile` (Mac)
103 | ```
104 | export GLOG_minloglevel=2
105 | export MAGNUM_LOG="quiet"
106 | ```
--------------------------------------------------------------------------------
/docs/example.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devendrachaplot/Object-Goal-Navigation/5d76902fe9be821926a1de32557ca9a8dc21d0f5/docs/example.gif
--------------------------------------------------------------------------------
/docs/legend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devendrachaplot/Object-Goal-Navigation/5d76902fe9be821926a1de32557ca9a8dc21d0f5/docs/legend.png
--------------------------------------------------------------------------------
/docs/overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devendrachaplot/Object-Goal-Navigation/5d76902fe9be821926a1de32557ca9a8dc21d0f5/docs/overview.jpg
--------------------------------------------------------------------------------
/envs/__init__.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .habitat import construct_envs
4 |
5 |
6 | def make_vec_envs(args):
7 | envs = construct_envs(args)
8 | envs = VecPyTorch(envs, args.device)
9 | return envs
10 |
11 |
12 | # Adapted from
13 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/envs.py#L159
14 | class VecPyTorch():
15 |
16 | def __init__(self, venv, device):
17 | self.venv = venv
18 | self.num_envs = venv.num_envs
19 | self.observation_space = venv.observation_space
20 | self.action_space = venv.action_space
21 | self.device = device
22 |
23 | def reset(self):
24 | obs, info = self.venv.reset()
25 | obs = torch.from_numpy(obs).float().to(self.device)
26 | return obs, info
27 |
28 | def step_async(self, actions):
29 | actions = actions.cpu().numpy()
30 | self.venv.step_async(actions)
31 |
32 | def step_wait(self):
33 | obs, reward, done, info = self.venv.step_wait()
34 | obs = torch.from_numpy(obs).float().to(self.device)
35 | reward = torch.from_numpy(reward).float()
36 | return obs, reward, done, info
37 |
38 | def step(self, actions):
39 | actions = actions.cpu().numpy()
40 | obs, reward, done, info = self.venv.step(actions)
41 | obs = torch.from_numpy(obs).float().to(self.device)
42 | reward = torch.from_numpy(reward).float()
43 | return obs, reward, done, info
44 |
45 | def get_rewards(self, inputs):
46 | reward = self.venv.get_rewards(inputs)
47 | reward = torch.from_numpy(reward).float()
48 | return reward
49 |
50 | def plan_act_and_preprocess(self, inputs):
51 | obs, reward, done, info = self.venv.plan_act_and_preprocess(inputs)
52 | obs = torch.from_numpy(obs).float().to(self.device)
53 | reward = torch.from_numpy(reward).float()
54 | return obs, reward, done, info
55 |
56 | def close(self):
57 | return self.venv.close()
58 |
--------------------------------------------------------------------------------
/envs/habitat/__init__.py:
--------------------------------------------------------------------------------
1 | # Parts of the code in this file have been borrowed from:
2 | # https://github.com/facebookresearch/habitat-api
3 | import os
4 | import numpy as np
5 | import torch
6 | from habitat.config.default import get_config as cfg_env
7 | from habitat.datasets.pointnav.pointnav_dataset import PointNavDatasetV1
8 | from habitat import Config, Env, RLEnv, VectorEnv, make_dataset
9 |
10 | from agents.sem_exp import Sem_Exp_Env_Agent
11 | from .objectgoal_env import ObjectGoal_Env
12 |
13 | from .utils.vector_env import VectorEnv
14 |
15 |
16 | def make_env_fn(args, config_env, rank):
17 | dataset = make_dataset(config_env.DATASET.TYPE, config=config_env.DATASET)
18 | config_env.defrost()
19 | config_env.SIMULATOR.SCENE = dataset.episodes[0].scene_id
20 | config_env.freeze()
21 |
22 | if args.agent == "sem_exp":
23 | env = Sem_Exp_Env_Agent(args=args, rank=rank,
24 | config_env=config_env,
25 | dataset=dataset
26 | )
27 | else:
28 | env = ObjectGoal_Env(args=args, rank=rank,
29 | config_env=config_env,
30 | dataset=dataset
31 | )
32 |
33 | env.seed(rank)
34 | return env
35 |
36 |
37 | def _get_scenes_from_folder(content_dir):
38 | scene_dataset_ext = ".glb.json.gz"
39 | scenes = []
40 | for filename in os.listdir(content_dir):
41 | if filename.endswith(scene_dataset_ext):
42 | scene = filename[: -len(scene_dataset_ext) + 4]
43 | scenes.append(scene)
44 | scenes.sort()
45 | return scenes
46 |
47 |
48 | def construct_envs(args):
49 | env_configs = []
50 | args_list = []
51 |
52 | basic_config = cfg_env(config_paths=["envs/habitat/configs/"
53 | + args.task_config])
54 | basic_config.defrost()
55 | basic_config.DATASET.SPLIT = args.split
56 | basic_config.DATASET.DATA_PATH = \
57 | basic_config.DATASET.DATA_PATH.replace("v1", args.version)
58 | basic_config.DATASET.EPISODES_DIR = \
59 | basic_config.DATASET.EPISODES_DIR.replace("v1", args.version)
60 | basic_config.freeze()
61 |
62 | scenes = basic_config.DATASET.CONTENT_SCENES
63 | if "*" in basic_config.DATASET.CONTENT_SCENES:
64 | content_dir = os.path.join(basic_config.DATASET.EPISODES_DIR.format(
65 | split=args.split), "content")
66 | scenes = _get_scenes_from_folder(content_dir)
67 |
68 | if len(scenes) > 0:
69 | assert len(scenes) >= args.num_processes, (
70 | "reduce the number of processes as there "
71 | "aren't enough number of scenes"
72 | )
73 |
74 | scene_split_sizes = [int(np.floor(len(scenes) / args.num_processes))
75 | for _ in range(args.num_processes)]
76 | for i in range(len(scenes) % args.num_processes):
77 | scene_split_sizes[i] += 1
78 |
79 | print("Scenes per thread:")
80 | for i in range(args.num_processes):
81 | config_env = cfg_env(config_paths=["envs/habitat/configs/"
82 | + args.task_config])
83 | config_env.defrost()
84 |
85 | if len(scenes) > 0:
86 | config_env.DATASET.CONTENT_SCENES = scenes[
87 | sum(scene_split_sizes[:i]):
88 | sum(scene_split_sizes[:i + 1])
89 | ]
90 | print("Thread {}: {}".format(i, config_env.DATASET.CONTENT_SCENES))
91 |
92 | if i < args.num_processes_on_first_gpu:
93 | gpu_id = 0
94 | else:
95 | gpu_id = int((i - args.num_processes_on_first_gpu)
96 | // args.num_processes_per_gpu) + args.sim_gpu_id
97 | gpu_id = min(torch.cuda.device_count() - 1, gpu_id)
98 | config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_id
99 |
100 | agent_sensors = []
101 | agent_sensors.append("RGB_SENSOR")
102 | agent_sensors.append("DEPTH_SENSOR")
103 | # agent_sensors.append("SEMANTIC_SENSOR")
104 |
105 | config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
106 |
107 | # Reseting episodes manually, setting high max episode length in sim
108 | config_env.ENVIRONMENT.MAX_EPISODE_STEPS = 10000000
109 | config_env.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False
110 |
111 | config_env.SIMULATOR.RGB_SENSOR.WIDTH = args.env_frame_width
112 | config_env.SIMULATOR.RGB_SENSOR.HEIGHT = args.env_frame_height
113 | config_env.SIMULATOR.RGB_SENSOR.HFOV = args.hfov
114 | config_env.SIMULATOR.RGB_SENSOR.POSITION = [0, args.camera_height, 0]
115 |
116 | config_env.SIMULATOR.DEPTH_SENSOR.WIDTH = args.env_frame_width
117 | config_env.SIMULATOR.DEPTH_SENSOR.HEIGHT = args.env_frame_height
118 | config_env.SIMULATOR.DEPTH_SENSOR.HFOV = args.hfov
119 | config_env.SIMULATOR.DEPTH_SENSOR.MIN_DEPTH = args.min_depth
120 | config_env.SIMULATOR.DEPTH_SENSOR.MAX_DEPTH = args.max_depth
121 | config_env.SIMULATOR.DEPTH_SENSOR.POSITION = [0, args.camera_height, 0]
122 |
123 | # config_env.SIMULATOR.SEMANTIC_SENSOR.WIDTH = args.env_frame_width
124 | # config_env.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = args.env_frame_height
125 | # config_env.SIMULATOR.SEMANTIC_SENSOR.HFOV = args.hfov
126 | # config_env.SIMULATOR.SEMANTIC_SENSOR.POSITION = \
127 | # [0, args.camera_height, 0]
128 |
129 | config_env.SIMULATOR.TURN_ANGLE = args.turn_angle
130 | config_env.DATASET.SPLIT = args.split
131 | config_env.DATASET.DATA_PATH = \
132 | config_env.DATASET.DATA_PATH.replace("v1", args.version)
133 | config_env.DATASET.EPISODES_DIR = \
134 | config_env.DATASET.EPISODES_DIR.replace("v1", args.version)
135 |
136 | config_env.freeze()
137 | env_configs.append(config_env)
138 |
139 | args_list.append(args)
140 |
141 | envs = VectorEnv(
142 | make_env_fn=make_env_fn,
143 | env_fn_args=tuple(
144 | tuple(
145 | zip(args_list, env_configs, range(args.num_processes))
146 | )
147 | ),
148 | )
149 |
150 | return envs
151 |
--------------------------------------------------------------------------------
/envs/habitat/configs/tasks/objectnav_gibson.yaml:
--------------------------------------------------------------------------------
1 | ENVIRONMENT:
2 | MAX_EPISODE_STEPS: 500
3 | SIMULATOR:
4 | TURN_ANGLE: 30
5 | TILT_ANGLE: 30
6 | ACTION_SPACE_CONFIG: "v1"
7 | AGENT_0:
8 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'SEMANTIC_SENSOR']
9 | HEIGHT: 0.88
10 | RADIUS: 0.18
11 | HABITAT_SIM_V0:
12 | GPU_DEVICE_ID: 0
13 | ALLOW_SLIDING: True
14 | SEMANTIC_SENSOR:
15 | WIDTH: 640
16 | HEIGHT: 480
17 | HFOV: 79
18 | POSITION: [0, 0.88, 0]
19 | RGB_SENSOR:
20 | WIDTH: 640
21 | HEIGHT: 480
22 | HFOV: 79
23 | POSITION: [0, 0.88, 0]
24 | DEPTH_SENSOR:
25 | WIDTH: 640
26 | HEIGHT: 480
27 | HFOV: 79
28 | MIN_DEPTH: 0.5
29 | MAX_DEPTH: 5.0
30 | POSITION: [0, 0.88, 0]
31 | TASK:
32 | TYPE: ObjectNav-v1
33 | POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"]
34 | SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR']
35 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL']
36 | SUCCESS:
37 | SUCCESS_DISTANCE: 0.2
38 |
39 | DATASET:
40 | TYPE: PointNav-v1
41 | SPLIT: train
42 | DATA_PATH: "data/datasets/objectnav/gibson/v1/{split}/{split}.json.gz"
43 | EPISODES_DIR: "data/datasets/objectnav/gibson/v1/{split}/"
44 | SCENES_DIR: "data/scene_datasets/"
45 |
--------------------------------------------------------------------------------
/envs/habitat/objectgoal_env.py:
--------------------------------------------------------------------------------
1 | import json
2 | import bz2
3 | import gzip
4 | import _pickle as cPickle
5 | import gym
6 | import numpy as np
7 | import quaternion
8 | import skimage.morphology
9 | import habitat
10 |
11 | from envs.utils.fmm_planner import FMMPlanner
12 | from constants import coco_categories
13 | import envs.utils.pose as pu
14 |
15 |
16 | class ObjectGoal_Env(habitat.RLEnv):
17 | """The Object Goal Navigation environment class. The class is responsible
18 | for loading the dataset, generating episodes, and computing evaluation
19 | metrics.
20 | """
21 |
22 | def __init__(self, args, rank, config_env, dataset):
23 | self.args = args
24 | self.rank = rank
25 |
26 | super().__init__(config_env, dataset)
27 |
28 | # Loading dataset info file
29 | self.split = config_env.DATASET.SPLIT
30 | self.episodes_dir = config_env.DATASET.EPISODES_DIR.format(
31 | split=self.split)
32 |
33 | dataset_info_file = self.episodes_dir + \
34 | "{split}_info.pbz2".format(split=self.split)
35 | with bz2.BZ2File(dataset_info_file, 'rb') as f:
36 | self.dataset_info = cPickle.load(f)
37 |
38 | # Specifying action and observation space
39 | self.action_space = gym.spaces.Discrete(3)
40 |
41 | self.observation_space = gym.spaces.Box(0, 255,
42 | (3, args.frame_height,
43 | args.frame_width),
44 | dtype='uint8')
45 |
46 | # Initializations
47 | self.episode_no = 0
48 |
49 | # Scene info
50 | self.last_scene_path = None
51 | self.scene_path = None
52 | self.scene_name = None
53 |
54 | # Episode Dataset info
55 | self.eps_data = None
56 | self.eps_data_idx = None
57 | self.gt_planner = None
58 | self.object_boundary = None
59 | self.goal_idx = None
60 | self.goal_name = None
61 | self.map_obj_origin = None
62 | self.starting_loc = None
63 | self.starting_distance = None
64 |
65 | # Episode tracking info
66 | self.curr_distance = None
67 | self.prev_distance = None
68 | self.timestep = None
69 | self.stopped = None
70 | self.path_length = None
71 | self.last_sim_location = None
72 | self.trajectory_states = []
73 | self.info = {}
74 | self.info['distance_to_goal'] = None
75 | self.info['spl'] = None
76 | self.info['success'] = None
77 |
78 | def load_new_episode(self):
79 | """The function loads a fixed episode from the episode dataset. This
80 | function is used for evaluating a trained model on the val split.
81 | """
82 |
83 | args = self.args
84 | self.scene_path = self.habitat_env.sim.config.SCENE
85 | scene_name = self.scene_path.split("/")[-1].split(".")[0]
86 |
87 | if self.scene_path != self.last_scene_path:
88 | episodes_file = self.episodes_dir + \
89 | "content/{}_episodes.json.gz".format(scene_name)
90 |
91 | print("Loading episodes from: {}".format(episodes_file))
92 | with gzip.open(episodes_file, 'r') as f:
93 | self.eps_data = json.loads(
94 | f.read().decode('utf-8'))["episodes"]
95 |
96 | self.eps_data_idx = 0
97 | self.last_scene_path = self.scene_path
98 |
99 | # Load episode info
100 | episode = self.eps_data[self.eps_data_idx]
101 | self.eps_data_idx += 1
102 | self.eps_data_idx = self.eps_data_idx % len(self.eps_data)
103 | pos = episode["start_position"]
104 | rot = quaternion.from_float_array(episode["start_rotation"])
105 |
106 | goal_name = episode["object_category"]
107 | goal_idx = episode["object_id"]
108 | floor_idx = episode["floor_id"]
109 |
110 | # Load scene info
111 | scene_info = self.dataset_info[scene_name]
112 | sem_map = scene_info[floor_idx]['sem_map']
113 | map_obj_origin = scene_info[floor_idx]['origin']
114 |
115 | # Setup ground truth planner
116 | object_boundary = args.success_dist
117 | map_resolution = args.map_resolution
118 | selem = skimage.morphology.disk(2)
119 | traversible = skimage.morphology.binary_dilation(
120 | sem_map[0], selem) != True
121 | traversible = 1 - traversible
122 | planner = FMMPlanner(traversible)
123 | selem = skimage.morphology.disk(
124 | int(object_boundary * 100. / map_resolution))
125 | goal_map = skimage.morphology.binary_dilation(
126 | sem_map[goal_idx + 1], selem) != True
127 | goal_map = 1 - goal_map
128 | planner.set_multi_goal(goal_map)
129 |
130 | # Get starting loc in GT map coordinates
131 | x = -pos[2]
132 | y = -pos[0]
133 | min_x, min_y = map_obj_origin / 100.0
134 | map_loc = int((-y - min_y) * 20.), int((-x - min_x) * 20.)
135 |
136 | self.gt_planner = planner
137 | self.starting_loc = map_loc
138 | self.object_boundary = object_boundary
139 | self.goal_idx = goal_idx
140 | self.goal_name = goal_name
141 | self.map_obj_origin = map_obj_origin
142 |
143 | self.starting_distance = self.gt_planner.fmm_dist[self.starting_loc]\
144 | / 20.0 + self.object_boundary
145 | self.prev_distance = self.starting_distance
146 | self._env.sim.set_agent_state(pos, rot)
147 |
148 | # The following two should match approximately
149 | # print(starting_loc)
150 | # print(self.sim_continuous_to_sim_map(self.get_sim_location()))
151 |
152 | obs = self._env.sim.get_observations_at(pos, rot)
153 |
154 | return obs
155 |
156 | def generate_new_episode(self):
157 | """The function generates a random valid episode. This function is used
158 | for training a model on the train split.
159 | """
160 |
161 | args = self.args
162 |
163 | self.scene_path = self.habitat_env.sim.config.SCENE
164 | scene_name = self.scene_path.split("/")[-1].split(".")[0]
165 |
166 | scene_info = self.dataset_info[scene_name]
167 | map_resolution = args.map_resolution
168 |
169 | floor_idx = np.random.randint(len(scene_info.keys()))
170 | floor_height = scene_info[floor_idx]['floor_height']
171 | sem_map = scene_info[floor_idx]['sem_map']
172 | map_obj_origin = scene_info[floor_idx]['origin']
173 |
174 | cat_counts = sem_map.sum(2).sum(1)
175 | possible_cats = list(np.arange(6))
176 |
177 | for i in range(6):
178 | if cat_counts[i + 1] == 0:
179 | possible_cats.remove(i)
180 |
181 | object_boundary = args.success_dist
182 |
183 | loc_found = False
184 | while not loc_found:
185 | if len(possible_cats) == 0:
186 | print("No valid objects for {}".format(floor_height))
187 | eps = eps - 1
188 | continue
189 |
190 | goal_idx = np.random.choice(possible_cats)
191 |
192 | for key, value in coco_categories.items():
193 | if value == goal_idx:
194 | goal_name = key
195 |
196 | selem = skimage.morphology.disk(2)
197 | traversible = skimage.morphology.binary_dilation(
198 | sem_map[0], selem) != True
199 | traversible = 1 - traversible
200 |
201 | planner = FMMPlanner(traversible)
202 |
203 | selem = skimage.morphology.disk(
204 | int(object_boundary * 100. / map_resolution))
205 | goal_map = skimage.morphology.binary_dilation(
206 | sem_map[goal_idx + 1], selem) != True
207 | goal_map = 1 - goal_map
208 |
209 | planner.set_multi_goal(goal_map)
210 |
211 | m1 = sem_map[0] > 0
212 | m2 = planner.fmm_dist > (args.min_d - object_boundary) * 20.0
213 | m3 = planner.fmm_dist < (args.max_d - object_boundary) * 20.0
214 |
215 | possible_starting_locs = np.logical_and(m1, m2)
216 | possible_starting_locs = np.logical_and(
217 | possible_starting_locs, m3) * 1.
218 | if possible_starting_locs.sum() != 0:
219 | loc_found = True
220 | else:
221 | print("Invalid object: {} / {} / {}".format(
222 | scene_name, floor_height, goal_name))
223 | possible_cats.remove(goal_idx)
224 | scene_info[floor_idx]["sem_map"][goal_idx + 1, :, :] = 0.
225 | self.dataset_info[scene_name][floor_idx][
226 | "sem_map"][goal_idx + 1, :, :] = 0.
227 |
228 | loc_found = False
229 | while not loc_found:
230 | pos = self._env.sim.sample_navigable_point()
231 | x = -pos[2]
232 | y = -pos[0]
233 | min_x, min_y = map_obj_origin / 100.0
234 | map_loc = int((-y - min_y) * 20.), int((-x - min_x) * 20.)
235 | if abs(pos[1] - floor_height) < args.floor_thr / 100.0 and \
236 | possible_starting_locs[map_loc[0], map_loc[1]] == 1:
237 | loc_found = True
238 |
239 | agent_state = self._env.sim.get_agent_state(0)
240 | rotation = agent_state.rotation
241 | rvec = quaternion.as_rotation_vector(rotation)
242 | rvec[1] = np.random.rand() * 2 * np.pi
243 | rot = quaternion.from_rotation_vector(rvec)
244 |
245 | self.gt_planner = planner
246 | self.starting_loc = map_loc
247 | self.object_boundary = object_boundary
248 | self.goal_idx = goal_idx
249 | self.goal_name = goal_name
250 | self.map_obj_origin = map_obj_origin
251 |
252 | self.starting_distance = self.gt_planner.fmm_dist[self.starting_loc] \
253 | / 20.0 + self.object_boundary
254 | self.prev_distance = self.starting_distance
255 |
256 | self._env.sim.set_agent_state(pos, rot)
257 |
258 | # The following two should match approximately
259 | # print(starting_loc)
260 | # print(self.sim_continuous_to_sim_map(self.get_sim_location()))
261 |
262 | obs = self._env.sim.get_observations_at(pos, rot)
263 |
264 | return obs
265 |
266 | def sim_map_to_sim_continuous(self, coords):
267 | """Converts ground-truth 2D Map coordinates to absolute Habitat
268 | simulator position and rotation.
269 | """
270 | agent_state = self._env.sim.get_agent_state(0)
271 | y, x = coords
272 | min_x, min_y = self.map_obj_origin / 100.0
273 |
274 | cont_x = x / 20. + min_x
275 | cont_y = y / 20. + min_y
276 | agent_state.position[0] = cont_y
277 | agent_state.position[2] = cont_x
278 |
279 | rotation = agent_state.rotation
280 | rvec = quaternion.as_rotation_vector(rotation)
281 |
282 | if self.args.train_single_eps:
283 | rvec[1] = 0.0
284 | else:
285 | rvec[1] = np.random.rand() * 2 * np.pi
286 | rot = quaternion.from_rotation_vector(rvec)
287 |
288 | return agent_state.position, rot
289 |
290 | def sim_continuous_to_sim_map(self, sim_loc):
291 | """Converts absolute Habitat simulator pose to ground-truth 2D Map
292 | coordinates.
293 | """
294 | x, y, o = sim_loc
295 | min_x, min_y = self.map_obj_origin / 100.0
296 | x, y = int((-x - min_x) * 20.), int((-y - min_y) * 20.)
297 |
298 | o = np.rad2deg(o) + 180.0
299 | return y, x, o
300 |
301 | def reset(self):
302 | """Resets the environment to a new episode.
303 |
304 | Returns:
305 | obs (ndarray): RGBD observations (4 x H x W)
306 | info (dict): contains timestep, pose, goal category and
307 | evaluation metric info
308 | """
309 | args = self.args
310 | new_scene = self.episode_no % args.num_train_episodes == 0
311 |
312 | self.episode_no += 1
313 |
314 | # Initializations
315 | self.timestep = 0
316 | self.stopped = False
317 | self.path_length = 1e-5
318 | self.trajectory_states = []
319 |
320 | if new_scene:
321 | obs = super().reset()
322 | self.scene_name = self.habitat_env.sim.config.SCENE
323 | print("Changing scene: {}/{}".format(self.rank, self.scene_name))
324 |
325 | self.scene_path = self.habitat_env.sim.config.SCENE
326 |
327 | if self.split == "val":
328 | obs = self.load_new_episode()
329 | else:
330 | obs = self.generate_new_episode()
331 |
332 | rgb = obs['rgb'].astype(np.uint8)
333 | depth = obs['depth']
334 | state = np.concatenate((rgb, depth), axis=2).transpose(2, 0, 1)
335 | self.last_sim_location = self.get_sim_location()
336 |
337 | # Set info
338 | self.info['time'] = self.timestep
339 | self.info['sensor_pose'] = [0., 0., 0.]
340 | self.info['goal_cat_id'] = self.goal_idx
341 | self.info['goal_name'] = self.goal_name
342 |
343 | return state, self.info
344 |
345 | def step(self, action):
346 | """Function to take an action in the environment.
347 |
348 | Args:
349 | action (dict):
350 | dict with following keys:
351 | 'action' (int): 0: stop, 1: forward, 2: left, 3: right
352 |
353 | Returns:
354 | obs (ndarray): RGBD observations (4 x H x W)
355 | reward (float): amount of reward returned after previous action
356 | done (bool): whether the episode has ended
357 | info (dict): contains timestep, pose, goal category and
358 | evaluation metric info
359 | """
360 | action = action["action"]
361 | if action == 0:
362 | self.stopped = True
363 | # Not sending stop to simulator, resetting manually
364 | action = 3
365 |
366 | obs, rew, done, _ = super().step(action)
367 |
368 | # Get pose change
369 | dx, dy, do = self.get_pose_change()
370 | self.info['sensor_pose'] = [dx, dy, do]
371 | self.path_length += pu.get_l2_distance(0, dx, 0, dy)
372 |
373 | spl, success, dist = 0., 0., 0.
374 | if done:
375 | spl, success, dist = self.get_metrics()
376 | self.info['distance_to_goal'] = dist
377 | self.info['spl'] = spl
378 | self.info['success'] = success
379 |
380 | rgb = obs['rgb'].astype(np.uint8)
381 | depth = obs['depth']
382 | state = np.concatenate((rgb, depth), axis=2).transpose(2, 0, 1)
383 |
384 | self.timestep += 1
385 | self.info['time'] = self.timestep
386 |
387 | return state, rew, done, self.info
388 |
389 | def get_reward_range(self):
390 | """This function is not used, Habitat-RLEnv requires this function"""
391 | return (0., 1.0)
392 |
393 | def get_reward(self, observations):
394 | curr_loc = self.sim_continuous_to_sim_map(self.get_sim_location())
395 | self.curr_distance = self.gt_planner.fmm_dist[curr_loc[0],
396 | curr_loc[1]] / 20.0
397 |
398 | reward = (self.prev_distance - self.curr_distance) * \
399 | self.args.reward_coeff
400 |
401 | self.prev_distance = self.curr_distance
402 | return reward
403 |
404 | def get_metrics(self):
405 | """This function computes evaluation metrics for the Object Goal task
406 |
407 | Returns:
408 | spl (float): Success weighted by Path Length
409 | (See https://arxiv.org/pdf/1807.06757.pdf)
410 | success (int): 0: Failure, 1: Successful
411 | dist (float): Distance to Success (DTS), distance of the agent
412 | from the success threshold boundary in meters.
413 | (See https://arxiv.org/pdf/2007.00643.pdf)
414 | """
415 | curr_loc = self.sim_continuous_to_sim_map(self.get_sim_location())
416 | dist = self.gt_planner.fmm_dist[curr_loc[0], curr_loc[1]] / 20.0
417 | if dist == 0.0:
418 | success = 1
419 | else:
420 | success = 0
421 | spl = min(success * self.starting_distance / self.path_length, 1)
422 | return spl, success, dist
423 |
424 | def get_done(self, observations):
425 | if self.info['time'] >= self.args.max_episode_length - 1:
426 | done = True
427 | elif self.stopped:
428 | done = True
429 | else:
430 | done = False
431 | return done
432 |
433 | def get_info(self, observations):
434 | """This function is not used, Habitat-RLEnv requires this function"""
435 | info = {}
436 | return info
437 |
438 | def get_spaces(self):
439 | """Returns observation and action spaces for the ObjectGoal task."""
440 | return self.observation_space, self.action_space
441 |
442 | def get_sim_location(self):
443 | """Returns x, y, o pose of the agent in the Habitat simulator."""
444 |
445 | agent_state = super().habitat_env.sim.get_agent_state(0)
446 | x = -agent_state.position[2]
447 | y = -agent_state.position[0]
448 | axis = quaternion.as_euler_angles(agent_state.rotation)[0]
449 | if (axis % (2 * np.pi)) < 0.1 or (axis %
450 | (2 * np.pi)) > 2 * np.pi - 0.1:
451 | o = quaternion.as_euler_angles(agent_state.rotation)[1]
452 | else:
453 | o = 2 * np.pi - quaternion.as_euler_angles(agent_state.rotation)[1]
454 | if o > np.pi:
455 | o -= 2 * np.pi
456 | return x, y, o
457 |
458 | def get_pose_change(self):
459 | """Returns dx, dy, do pose change of the agent relative to the last
460 | timestep."""
461 | curr_sim_pose = self.get_sim_location()
462 | dx, dy, do = pu.get_rel_pose_change(
463 | curr_sim_pose, self.last_sim_location)
464 | self.last_sim_location = curr_sim_pose
465 | return dx, dy, do
466 |
--------------------------------------------------------------------------------
/envs/habitat/utils/vector_env.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | # This source code is licensed under the MIT license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from multiprocessing.connection import Connection
8 | from multiprocessing.context import BaseContext
9 | from queue import Queue
10 | from threading import Thread
11 | from typing import (
12 | Any,
13 | Callable,
14 | Dict,
15 | List,
16 | Optional,
17 | Sequence,
18 | Set,
19 | Tuple,
20 | Union,
21 | )
22 |
23 | import gym
24 | import numpy as np
25 | from gym.spaces.dict_space import Dict as SpaceDict
26 |
27 | import habitat
28 | from habitat.config import Config
29 | from habitat.core.env import Env, Observations, RLEnv
30 | from habitat.core.logging import logger
31 | from habitat.core.utils import tile_images
32 |
33 | try:
34 | # Use torch.multiprocessing if we can.
35 | # We have yet to find a reason to not use it and
36 | # you are required to use it when sending a torch.Tensor
37 | # between processes
38 | import torch.multiprocessing as mp
39 | except ImportError:
40 | import multiprocessing as mp
41 |
42 | STEP_COMMAND = "step"
43 | RESET_COMMAND = "reset"
44 | RENDER_COMMAND = "render"
45 | CLOSE_COMMAND = "close"
46 | OBSERVATION_SPACE_COMMAND = "observation_space"
47 | ACTION_SPACE_COMMAND = "action_space"
48 | CALL_COMMAND = "call"
49 | EPISODE_COMMAND = "current_episode"
50 | PLAN_ACT_AND_PREPROCESS = "plan_act_and_preprocess"
51 | COUNT_EPISODES_COMMAND = "count_episodes"
52 | EPISODE_OVER = "episode_over"
53 | GET_METRICS = "get_metrics"
54 |
55 |
56 | def _make_env_fn(
57 | config: Config, dataset: Optional[habitat.Dataset] = None, rank: int = 0
58 | ) -> Env:
59 | """Constructor for default habitat `env.Env`.
60 |
61 | :param config: configuration for environment.
62 | :param dataset: dataset for environment.
63 | :param rank: rank for setting seed of environment
64 | :return: `env.Env` / `env.RLEnv` object
65 | """
66 | habitat_env = Env(config=config, dataset=dataset)
67 | habitat_env.seed(config.SEED + rank)
68 | return habitat_env
69 |
70 |
71 | class VectorEnv:
72 | r"""Vectorized environment which creates multiple processes where each
73 | process runs its own environment. Main class for parallelization of
74 | training and evaluation.
75 |
76 |
77 | All the environments are synchronized on step and reset methods.
78 | """
79 |
80 | observation_spaces: List[SpaceDict]
81 | action_spaces: List[SpaceDict]
82 | _workers: List[Union[mp.Process, Thread]]
83 | _is_waiting: bool
84 | _num_envs: int
85 | _auto_reset_done: bool
86 | _mp_ctx: BaseContext
87 | _connection_read_fns: List[Callable[[], Any]]
88 | _connection_write_fns: List[Callable[[Any], None]]
89 |
90 | def __init__(
91 | self,
92 | make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn,
93 | env_fn_args: Sequence[Tuple] = None,
94 | auto_reset_done: bool = True,
95 | multiprocessing_start_method: str = "forkserver",
96 | ) -> None:
97 | """..
98 |
99 | :param make_env_fn: function which creates a single environment. An
100 | environment can be of type `env.Env` or `env.RLEnv`
101 | :param env_fn_args: tuple of tuple of args to pass to the
102 | `_make_env_fn`.
103 | :param auto_reset_done: automatically reset the environment when
104 | done. This functionality is provided for seamless training
105 | of vectorized environments.
106 | :param multiprocessing_start_method: the multiprocessing method used to
107 | spawn worker processes. Valid methods are
108 | :py:`{'spawn', 'forkserver', 'fork'}`; :py:`'forkserver'` is the
109 | recommended method as it works well with CUDA. If :py:`'fork'` is
110 | used, the subproccess must be started before any other GPU useage.
111 | """
112 | self._is_waiting = False
113 | self._is_closed = True
114 |
115 | assert (
116 | env_fn_args is not None and len(env_fn_args) > 0
117 | ), "number of environments to be created should be greater than 0"
118 |
119 | self._num_envs = len(env_fn_args)
120 |
121 | assert multiprocessing_start_method in self._valid_start_methods, (
122 | "multiprocessing_start_method must be one of {}. Got '{}'"
123 | ).format(self._valid_start_methods, multiprocessing_start_method)
124 | self._auto_reset_done = auto_reset_done
125 | self._mp_ctx = mp.get_context(multiprocessing_start_method)
126 | self._workers = []
127 | (
128 | self._connection_read_fns,
129 | self._connection_write_fns,
130 | ) = self._spawn_workers( # noqa
131 | env_fn_args, make_env_fn
132 | )
133 |
134 | self._is_closed = False
135 |
136 | for write_fn in self._connection_write_fns:
137 | write_fn((OBSERVATION_SPACE_COMMAND, None))
138 | self.observation_spaces = [
139 | read_fn() for read_fn in self._connection_read_fns
140 | ]
141 | for write_fn in self._connection_write_fns:
142 | write_fn((ACTION_SPACE_COMMAND, None))
143 | self.action_spaces = [
144 | read_fn() for read_fn in self._connection_read_fns
145 | ]
146 | self.observation_space = self.observation_spaces[0]
147 | self.action_space = self.action_spaces[0]
148 | self._paused = []
149 |
150 | @property
151 | def num_envs(self):
152 | r"""number of individual environments.
153 | """
154 | return self._num_envs - len(self._paused)
155 |
156 | @staticmethod
157 | def _worker_env(
158 | connection_read_fn: Callable,
159 | connection_write_fn: Callable,
160 | env_fn: Callable,
161 | env_fn_args: Tuple[Any],
162 | auto_reset_done: bool,
163 | child_pipe: Optional[Connection] = None,
164 | parent_pipe: Optional[Connection] = None,
165 | ) -> None:
166 | r"""process worker for creating and interacting with the environment.
167 | """
168 | env = env_fn(*env_fn_args)
169 | if parent_pipe is not None:
170 | parent_pipe.close()
171 | try:
172 | command, data = connection_read_fn()
173 | while command != CLOSE_COMMAND:
174 | if command == STEP_COMMAND:
175 | # different step methods for habitat.RLEnv and habitat.Env
176 | if isinstance(env, habitat.RLEnv) or isinstance(
177 | env, gym.Env
178 | ):
179 | # habitat.RLEnv
180 | observations, reward, done, info = env.step(**data)
181 | if auto_reset_done and done:
182 | observations, info = env.reset()
183 | connection_write_fn((observations, reward, done, info))
184 | elif isinstance(env, habitat.Env):
185 | # habitat.Env
186 | observations = env.step(**data)
187 | if auto_reset_done and env.episode_over:
188 | observations = env.reset()
189 | connection_write_fn(observations)
190 | else:
191 | raise NotImplementedError
192 |
193 | elif command == RESET_COMMAND:
194 | observations = env.reset()
195 | connection_write_fn(observations)
196 |
197 | elif command == RENDER_COMMAND:
198 | connection_write_fn(env.render(*data[0], **data[1]))
199 |
200 | elif (
201 | command == OBSERVATION_SPACE_COMMAND
202 | or command == ACTION_SPACE_COMMAND
203 | ):
204 | if isinstance(command, str):
205 | connection_write_fn(getattr(env, command))
206 |
207 | elif command == CALL_COMMAND:
208 | function_name, function_args = data
209 | if function_args is None or len(function_args) == 0:
210 | result = getattr(env, function_name)()
211 | else:
212 | result = getattr(env, function_name)(**function_args)
213 | connection_write_fn(result)
214 |
215 | # TODO: update CALL_COMMAND for getting attribute like this
216 | elif command == EPISODE_COMMAND:
217 | connection_write_fn(env.current_episode)
218 |
219 | elif command == PLAN_ACT_AND_PREPROCESS:
220 | observations, reward, done, info = \
221 | env.plan_act_and_preprocess(data)
222 | if auto_reset_done and done:
223 | observations, info = env.reset()
224 | connection_write_fn((observations, reward, done, info))
225 |
226 | elif command == COUNT_EPISODES_COMMAND:
227 | connection_write_fn(len(env.episodes))
228 |
229 | elif command == EPISODE_OVER:
230 | connection_write_fn(env.episode_over)
231 |
232 | elif command == GET_METRICS:
233 | result = env.get_metrics()
234 | connection_write_fn(result)
235 |
236 | else:
237 | raise NotImplementedError
238 |
239 | command, data = connection_read_fn()
240 |
241 | if child_pipe is not None:
242 | child_pipe.close()
243 | except KeyboardInterrupt:
244 | logger.info("Worker KeyboardInterrupt")
245 | finally:
246 | env.close()
247 |
248 | def _spawn_workers(
249 | self,
250 | env_fn_args: Sequence[Tuple],
251 | make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn,
252 | ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]:
253 | parent_connections, worker_connections = zip(
254 | *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_envs)]
255 | )
256 | self._workers = []
257 | for worker_conn, parent_conn, env_args in zip(
258 | worker_connections, parent_connections, env_fn_args
259 | ):
260 | ps = self._mp_ctx.Process(
261 | target=self._worker_env,
262 | args=(
263 | worker_conn.recv,
264 | worker_conn.send,
265 | make_env_fn,
266 | env_args,
267 | self._auto_reset_done,
268 | worker_conn,
269 | parent_conn,
270 | ),
271 | )
272 | self._workers.append(ps)
273 | ps.daemon = True
274 | ps.start()
275 | worker_conn.close()
276 | return (
277 | [p.recv for p in parent_connections],
278 | [p.send for p in parent_connections],
279 | )
280 |
281 | def current_episodes(self):
282 | self._is_waiting = True
283 | for write_fn in self._connection_write_fns:
284 | write_fn((EPISODE_COMMAND, None))
285 | results = []
286 | for read_fn in self._connection_read_fns:
287 | results.append(read_fn())
288 | self._is_waiting = False
289 | return results
290 |
291 | def count_episodes(self):
292 | self._is_waiting = True
293 | for write_fn in self._connection_write_fns:
294 | write_fn((COUNT_EPISODES_COMMAND, None))
295 | results = []
296 | for read_fn in self._connection_read_fns:
297 | results.append(read_fn())
298 | self._is_waiting = False
299 | return results
300 |
301 | def episode_over(self):
302 | self._is_waiting = True
303 | for write_fn in self._connection_write_fns:
304 | write_fn((EPISODE_OVER, None))
305 | results = []
306 | for read_fn in self._connection_read_fns:
307 | results.append(read_fn())
308 | self._is_waiting = False
309 | return results
310 |
311 | def get_metrics(self):
312 | self._is_waiting = True
313 | for write_fn in self._connection_write_fns:
314 | write_fn((GET_METRICS, None))
315 | results = []
316 | for read_fn in self._connection_read_fns:
317 | results.append(read_fn())
318 | self._is_waiting = False
319 | return results
320 |
321 | def reset(self):
322 | r"""Reset all the vectorized environments
323 |
324 | :return: list of outputs from the reset method of envs.
325 | """
326 | self._is_waiting = True
327 | for write_fn in self._connection_write_fns:
328 | write_fn((RESET_COMMAND, None))
329 | results = []
330 | for read_fn in self._connection_read_fns:
331 | results.append(read_fn())
332 | obs, infos = zip(*results)
333 |
334 | self._is_waiting = False
335 | return np.stack(obs), infos
336 |
337 | def reset_at(self, index_env: int):
338 | r"""Reset in the index_env environment in the vector.
339 |
340 | :param index_env: index of the environment to be reset
341 | :return: list containing the output of reset method of indexed env.
342 | """
343 | self._is_waiting = True
344 | self._connection_write_fns[index_env]((RESET_COMMAND, None))
345 | results = [self._connection_read_fns[index_env]()]
346 | self._is_waiting = False
347 | return results
348 |
349 | def step_at(self, index_env: int, action: Dict[str, Any]):
350 | r"""Step in the index_env environment in the vector.
351 |
352 | :param index_env: index of the environment to be stepped into
353 | :param action: action to be taken
354 | :return: list containing the output of step method of indexed env.
355 | """
356 | self._is_waiting = True
357 | self._connection_write_fns[index_env]((STEP_COMMAND, action))
358 | results = [self._connection_read_fns[index_env]()]
359 | self._is_waiting = False
360 | return results
361 |
362 | def step_async(self, data: List[Union[int, str, Dict[str, Any]]]) -> None:
363 | r"""Asynchronously step in the environments.
364 |
365 | :param data: list of size _num_envs containing keyword arguments to
366 | pass to `step` method for each Environment. For example,
367 | :py:`[{"action": "TURN_LEFT", "action_args": {...}}, ...]`.
368 | """
369 | # Backward compatibility
370 | if isinstance(data[0], (int, np.integer, str)):
371 | data = [{"action": {"action": action}} for action in data]
372 |
373 | self._is_waiting = True
374 | for write_fn, args in zip(self._connection_write_fns, data):
375 | write_fn((STEP_COMMAND, args))
376 |
377 | def step_wait(self) -> List[Observations]:
378 | r"""Wait until all the asynchronized environments have synchronized.
379 | """
380 | results = []
381 | for read_fn in self._connection_read_fns:
382 | results.append(read_fn())
383 | self._is_waiting = False
384 | obs, rews, dones, infos = zip(*results)
385 | return np.stack(obs), np.stack(rews), np.stack(dones), infos
386 |
387 | def step(self, data: List[Union[int, str, Dict[str, Any]]]) -> List[Any]:
388 | r"""Perform actions in the vectorized environments.
389 |
390 | :param data: list of size _num_envs containing keyword arguments to
391 | pass to `step` method for each Environment. For example,
392 | :py:`[{"action": "TURN_LEFT", "action_args": {...}}, ...]`.
393 | :return: list of outputs from the step method of envs.
394 | """
395 | self.step_async(data)
396 | return self.step_wait()
397 |
398 | def close(self) -> None:
399 | if self._is_closed:
400 | return
401 |
402 | if self._is_waiting:
403 | for read_fn in self._connection_read_fns:
404 | read_fn()
405 |
406 | for write_fn in self._connection_write_fns:
407 | write_fn((CLOSE_COMMAND, None))
408 |
409 | for _, _, write_fn, _ in self._paused:
410 | write_fn((CLOSE_COMMAND, None))
411 |
412 | for process in self._workers:
413 | process.join()
414 |
415 | for _, _, _, process in self._paused:
416 | process.join()
417 |
418 | self._is_closed = True
419 |
420 | def pause_at(self, index: int) -> None:
421 | r"""Pauses computation on this env without destroying the env.
422 |
423 | :param index: which env to pause. All indexes after this one will be
424 | shifted down by one.
425 |
426 | This is useful for not needing to call steps on all environments when
427 | only some are active (for example during the last episodes of running
428 | eval episodes).
429 | """
430 | if self._is_waiting:
431 | for read_fn in self._connection_read_fns:
432 | read_fn()
433 | read_fn = self._connection_read_fns.pop(index)
434 | write_fn = self._connection_write_fns.pop(index)
435 | worker = self._workers.pop(index)
436 | self._paused.append((index, read_fn, write_fn, worker))
437 |
438 | def resume_all(self) -> None:
439 | r"""Resumes any paused envs.
440 | """
441 | for index, read_fn, write_fn, worker in reversed(self._paused):
442 | self._connection_read_fns.insert(index, read_fn)
443 | self._connection_write_fns.insert(index, write_fn)
444 | self._workers.insert(index, worker)
445 | self._paused = []
446 |
447 | def call_at(
448 | self,
449 | index: int,
450 | function_name: str,
451 | function_args: Optional[Dict[str, Any]] = None,
452 | ) -> Any:
453 | r"""Calls a function (which is passed by name) on the selected env and
454 | returns the result.
455 |
456 | :param index: which env to call the function on.
457 | :param function_name: the name of the function to call on the env.
458 | :param function_args: optional function args.
459 | :return: result of calling the function.
460 | """
461 | self._is_waiting = True
462 | self._connection_write_fns[index](
463 | (CALL_COMMAND, (function_name, function_args))
464 | )
465 | result = self._connection_read_fns[index]()
466 | self._is_waiting = False
467 | return result
468 |
469 | def call(
470 | self,
471 | function_names: List[str],
472 | function_args_list: Optional[List[Any]] = None,
473 | ) -> List[Any]:
474 | r"""Calls a list of functions (which are passed by name) on the
475 | corresponding env (by index).
476 |
477 | :param function_names: the name of the functions to call on the envs.
478 | :param function_args_list: list of function args for each function. If
479 | provided, :py:`len(function_args_list)` should be as long as
480 | :py:`len(function_names)`.
481 | :return: result of calling the function.
482 | """
483 | self._is_waiting = True
484 | if function_args_list is None:
485 | function_args_list = [None] * len(function_names)
486 | assert len(function_names) == len(function_args_list)
487 | func_args = zip(function_names, function_args_list)
488 | for write_fn, func_args_on in zip(
489 | self._connection_write_fns, func_args
490 | ):
491 | write_fn((CALL_COMMAND, func_args_on))
492 | results = []
493 | for read_fn in self._connection_read_fns:
494 | results.append(read_fn())
495 | self._is_waiting = False
496 | return results
497 |
498 | def render(
499 | self, mode: str = "human", *args, **kwargs
500 | ) -> Union[np.ndarray, None]:
501 | r"""Render observations from all environments in a tiled image.
502 | """
503 | for write_fn in self._connection_write_fns:
504 | write_fn((RENDER_COMMAND, (args, {"mode": "rgb", **kwargs})))
505 | images = [read_fn() for read_fn in self._connection_read_fns]
506 | tile = tile_images(images)
507 | if mode == "human":
508 | from habitat.core.utils import try_cv2_import
509 |
510 | cv2 = try_cv2_import()
511 |
512 | cv2.imshow("vecenv", tile[:, :, ::-1])
513 | cv2.waitKey(1)
514 | return None
515 | elif mode == "rgb_array":
516 | return tile
517 | else:
518 | raise NotImplementedError
519 |
520 | def plan_act_and_preprocess(self, inputs):
521 | self._assert_not_closed()
522 | self._is_waiting = True
523 | for e, write_fn in enumerate(self._connection_write_fns):
524 | write_fn((PLAN_ACT_AND_PREPROCESS, inputs[e]))
525 | results = []
526 | for read_fn in self._connection_read_fns:
527 | results.append(read_fn())
528 | obs, rews, dones, infos = zip(*results)
529 | self._is_waiting = False
530 | return np.stack(obs), np.stack(rews), np.stack(dones), infos
531 |
532 | def _assert_not_closed(self):
533 | assert not self._is_closed, "Trying to operate on a SubprocVecEnv after calling close()"
534 |
535 | @property
536 | def _valid_start_methods(self) -> Set[str]:
537 | return {"forkserver", "spawn", "fork"}
538 |
539 | def __del__(self):
540 | self.close()
541 |
542 | def __enter__(self):
543 | return self
544 |
545 | def __exit__(self, exc_type, exc_val, exc_tb):
546 | self.close()
547 |
548 |
549 | class ThreadedVectorEnv(VectorEnv):
550 | r"""Provides same functionality as `VectorEnv`, the only difference is it
551 | runs in a multi-thread setup inside a single process.
552 |
553 | `VectorEnv` runs in a multi-proc setup. This makes it much easier to debug
554 | when using `VectorEnv` because you can actually put break points in the
555 | environment methods. It should not be used for best performance.
556 | """
557 |
558 | def _spawn_workers(
559 | self,
560 | env_fn_args: Sequence[Tuple],
561 | make_env_fn: Callable[..., Env] = _make_env_fn,
562 | ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]:
563 | parent_read_queues, parent_write_queues = zip(
564 | *[(Queue(), Queue()) for _ in range(self._num_envs)]
565 | )
566 | self._workers = []
567 | for parent_read_queue, parent_write_queue, env_args in zip(
568 | parent_read_queues, parent_write_queues, env_fn_args
569 | ):
570 | thread = Thread(
571 | target=self._worker_env,
572 | args=(
573 | parent_write_queue.get,
574 | parent_read_queue.put,
575 | make_env_fn,
576 | env_args,
577 | self._auto_reset_done,
578 | ),
579 | )
580 | self._workers.append(thread)
581 | thread.daemon = True
582 | thread.start()
583 | return (
584 | [q.get for q in parent_read_queues],
585 | [q.put for q in parent_write_queues],
586 | )
587 |
--------------------------------------------------------------------------------
/envs/utils/depth_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Utilities for processing depth images.
17 | """
18 | from argparse import Namespace
19 |
20 | import itertools
21 | import numpy as np
22 | import torch
23 |
24 | import envs.utils.rotation_utils as ru
25 |
26 |
27 | def get_camera_matrix(width, height, fov):
28 | """Returns a camera matrix from image size and fov."""
29 | xc = (width - 1.) / 2.
30 | zc = (height - 1.) / 2.
31 | f = (width / 2.) / np.tan(np.deg2rad(fov / 2.))
32 | camera_matrix = {'xc': xc, 'zc': zc, 'f': f}
33 | camera_matrix = Namespace(**camera_matrix)
34 | return camera_matrix
35 |
36 |
37 | def get_point_cloud_from_z(Y, camera_matrix, scale=1):
38 | """Projects the depth image Y into a 3D point cloud.
39 | Inputs:
40 | Y is ...xHxW
41 | camera_matrix
42 | Outputs:
43 | X is positive going right
44 | Y is positive into the image
45 | Z is positive up in the image
46 | XYZ is ...xHxWx3
47 | """
48 | x, z = np.meshgrid(np.arange(Y.shape[-1]),
49 | np.arange(Y.shape[-2] - 1, -1, -1))
50 | for _ in range(Y.ndim - 2):
51 | x = np.expand_dims(x, axis=0)
52 | z = np.expand_dims(z, axis=0)
53 | X = (x[::scale, ::scale] - camera_matrix.xc) * \
54 | Y[::scale, ::scale] / camera_matrix.f
55 | Z = (z[::scale, ::scale] - camera_matrix.zc) * \
56 | Y[::scale, ::scale] / camera_matrix.f
57 | XYZ = np.concatenate((X[..., np.newaxis],
58 | Y[::scale, ::scale][..., np.newaxis],
59 | Z[..., np.newaxis]), axis=X.ndim)
60 | return XYZ
61 |
62 |
63 | def transform_camera_view(XYZ, sensor_height, camera_elevation_degree):
64 | """
65 | Transforms the point cloud into geocentric frame to account for
66 | camera elevation and angle
67 | Input:
68 | XYZ : ...x3
69 | sensor_height : height of the sensor
70 | camera_elevation_degree : camera elevation to rectify.
71 | Output:
72 | XYZ : ...x3
73 | """
74 | R = ru.get_r_matrix(
75 | [1., 0., 0.], angle=np.deg2rad(camera_elevation_degree))
76 | XYZ = np.matmul(XYZ.reshape(-1, 3), R.T).reshape(XYZ.shape)
77 | XYZ[..., 2] = XYZ[..., 2] + sensor_height
78 | return XYZ
79 |
80 |
81 | def transform_pose(XYZ, current_pose):
82 | """
83 | Transforms the point cloud into geocentric frame to account for
84 | camera position
85 | Input:
86 | XYZ : ...x3
87 | current_pose : camera position (x, y, theta (radians))
88 | Output:
89 | XYZ : ...x3
90 | """
91 | R = ru.get_r_matrix([0., 0., 1.], angle=current_pose[2] - np.pi / 2.)
92 | XYZ = np.matmul(XYZ.reshape(-1, 3), R.T).reshape(XYZ.shape)
93 | XYZ[:, :, 0] = XYZ[:, :, 0] + current_pose[0]
94 | XYZ[:, :, 1] = XYZ[:, :, 1] + current_pose[1]
95 | return XYZ
96 |
97 |
98 | def bin_points(XYZ_cms, map_size, z_bins, xy_resolution):
99 | """Bins points into xy-z bins
100 | XYZ_cms is ... x H x W x3
101 | Outputs is ... x map_size x map_size x (len(z_bins)+1)
102 | """
103 | sh = XYZ_cms.shape
104 | XYZ_cms = XYZ_cms.reshape([-1, sh[-3], sh[-2], sh[-1]])
105 | n_z_bins = len(z_bins) + 1
106 | counts = []
107 | for XYZ_cm in XYZ_cms:
108 | isnotnan = np.logical_not(np.isnan(XYZ_cm[:, :, 0]))
109 | X_bin = np.round(XYZ_cm[:, :, 0] / xy_resolution).astype(np.int32)
110 | Y_bin = np.round(XYZ_cm[:, :, 1] / xy_resolution).astype(np.int32)
111 | Z_bin = np.digitize(XYZ_cm[:, :, 2], bins=z_bins).astype(np.int32)
112 |
113 | isvalid = np.array([X_bin >= 0, X_bin < map_size, Y_bin >= 0,
114 | Y_bin < map_size,
115 | Z_bin >= 0, Z_bin < n_z_bins, isnotnan])
116 | isvalid = np.all(isvalid, axis=0)
117 |
118 | ind = (Y_bin * map_size + X_bin) * n_z_bins + Z_bin
119 | ind[np.logical_not(isvalid)] = 0
120 | count = np.bincount(ind.ravel(), isvalid.ravel().astype(np.int32),
121 | minlength=map_size * map_size * n_z_bins)
122 | counts = np.reshape(count, [map_size, map_size, n_z_bins])
123 |
124 | counts = counts.reshape(list(sh[:-3]) + [map_size, map_size, n_z_bins])
125 |
126 | return counts
127 |
128 |
129 | def get_point_cloud_from_z_t(Y_t, camera_matrix, device, scale=1):
130 | """Projects the depth image Y into a 3D point cloud.
131 | Inputs:
132 | Y is ...xHxW
133 | camera_matrix
134 | Outputs:
135 | X is positive going right
136 | Y is positive into the image
137 | Z is positive up in the image
138 | XYZ is ...xHxWx3
139 | """
140 | grid_x, grid_z = torch.meshgrid(torch.arange(Y_t.shape[-1]),
141 | torch.arange(Y_t.shape[-2] - 1, -1, -1))
142 | grid_x = grid_x.transpose(1, 0).to(device)
143 | grid_z = grid_z.transpose(1, 0).to(device)
144 | grid_x = grid_x.unsqueeze(0).expand(Y_t.size())
145 | grid_z = grid_z.unsqueeze(0).expand(Y_t.size())
146 |
147 | X_t = (grid_x[:, ::scale, ::scale] - camera_matrix.xc) * \
148 | Y_t[:, ::scale, ::scale] / camera_matrix.f
149 | Z_t = (grid_z[:, ::scale, ::scale] - camera_matrix.zc) * \
150 | Y_t[:, ::scale, ::scale] / camera_matrix.f
151 |
152 | XYZ = torch.stack(
153 | (X_t, Y_t[:, ::scale, ::scale], Z_t), dim=len(Y_t.size()))
154 |
155 | return XYZ
156 |
157 |
158 | def transform_camera_view_t(
159 | XYZ, sensor_height, camera_elevation_degree, device):
160 | """
161 | Transforms the point cloud into geocentric frame to account for
162 | camera elevation and angle
163 | Input:
164 | XYZ : ...x3
165 | sensor_height : height of the sensor
166 | camera_elevation_degree : camera elevation to rectify.
167 | Output:
168 | XYZ : ...x3
169 | """
170 | R = ru.get_r_matrix(
171 | [1., 0., 0.], angle=np.deg2rad(camera_elevation_degree))
172 | XYZ = torch.matmul(XYZ.reshape(-1, 3),
173 | torch.from_numpy(R).float().transpose(1, 0).to(device)
174 | ).reshape(XYZ.shape)
175 | XYZ[..., 2] = XYZ[..., 2] + sensor_height
176 | return XYZ
177 |
178 |
179 | def transform_pose_t(XYZ, current_pose, device):
180 | """
181 | Transforms the point cloud into geocentric frame to account for
182 | camera position
183 | Input:
184 | XYZ : ...x3
185 | current_pose : camera position (x, y, theta (radians))
186 | Output:
187 | XYZ : ...x3
188 | """
189 | R = ru.get_r_matrix([0., 0., 1.], angle=current_pose[2] - np.pi / 2.)
190 | XYZ = torch.matmul(XYZ.reshape(-1, 3),
191 | torch.from_numpy(R).float().transpose(1, 0).to(device)
192 | ).reshape(XYZ.shape)
193 | XYZ[..., 0] += current_pose[0]
194 | XYZ[..., 1] += current_pose[1]
195 | return XYZ
196 |
197 |
198 | def splat_feat_nd(init_grid, feat, coords):
199 | """
200 | Args:
201 | init_grid: B X nF X W X H X D X ..
202 | feat: B X nF X nPt
203 | coords: B X nDims X nPt in [-1, 1]
204 | Returns:
205 | grid: B X nF X W X H X D X ..
206 | """
207 | wts_dim = []
208 | pos_dim = []
209 | grid_dims = init_grid.shape[2:]
210 |
211 | B = init_grid.shape[0]
212 | F = init_grid.shape[1]
213 |
214 | n_dims = len(grid_dims)
215 |
216 | grid_flat = init_grid.view(B, F, -1)
217 |
218 | for d in range(n_dims):
219 | pos = coords[:, [d], :] * grid_dims[d] / 2 + grid_dims[d] / 2
220 | pos_d = []
221 | wts_d = []
222 |
223 | for ix in [0, 1]:
224 | pos_ix = torch.floor(pos) + ix
225 | safe_ix = (pos_ix > 0) & (pos_ix < grid_dims[d])
226 | safe_ix = safe_ix.type(pos.dtype)
227 |
228 | wts_ix = 1 - torch.abs(pos - pos_ix)
229 |
230 | wts_ix = wts_ix * safe_ix
231 | pos_ix = pos_ix * safe_ix
232 |
233 | pos_d.append(pos_ix)
234 | wts_d.append(wts_ix)
235 |
236 | pos_dim.append(pos_d)
237 | wts_dim.append(wts_d)
238 |
239 | l_ix = [[0, 1] for d in range(n_dims)]
240 |
241 | for ix_d in itertools.product(*l_ix):
242 | wts = torch.ones_like(wts_dim[0][0])
243 | index = torch.zeros_like(wts_dim[0][0])
244 | for d in range(n_dims):
245 | index = index * grid_dims[d] + pos_dim[d][ix_d[d]]
246 | wts = wts * wts_dim[d][ix_d[d]]
247 |
248 | index = index.long()
249 | grid_flat.scatter_add_(2, index.expand(-1, F, -1), feat * wts)
250 | grid_flat = torch.round(grid_flat)
251 |
252 | return grid_flat.view(init_grid.shape)
253 |
--------------------------------------------------------------------------------
/envs/utils/fmm_planner.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import skfmm
4 | import skimage
5 | from numpy import ma
6 |
7 |
8 | def get_mask(sx, sy, scale, step_size):
9 | size = int(step_size // scale) * 2 + 1
10 | mask = np.zeros((size, size))
11 | for i in range(size):
12 | for j in range(size):
13 | if ((i + 0.5) - (size // 2 + sx)) ** 2 + \
14 | ((j + 0.5) - (size // 2 + sy)) ** 2 <= \
15 | step_size ** 2 \
16 | and ((i + 0.5) - (size // 2 + sx)) ** 2 + \
17 | ((j + 0.5) - (size // 2 + sy)) ** 2 > \
18 | (step_size - 1) ** 2:
19 | mask[i, j] = 1
20 |
21 | mask[size // 2, size // 2] = 1
22 | return mask
23 |
24 |
25 | def get_dist(sx, sy, scale, step_size):
26 | size = int(step_size // scale) * 2 + 1
27 | mask = np.zeros((size, size)) + 1e-10
28 | for i in range(size):
29 | for j in range(size):
30 | if ((i + 0.5) - (size // 2 + sx)) ** 2 + \
31 | ((j + 0.5) - (size // 2 + sy)) ** 2 <= \
32 | step_size ** 2:
33 | mask[i, j] = max(5,
34 | (((i + 0.5) - (size // 2 + sx)) ** 2 +
35 | ((j + 0.5) - (size // 2 + sy)) ** 2) ** 0.5)
36 | return mask
37 |
38 |
39 | class FMMPlanner():
40 | def __init__(self, traversible, scale=1, step_size=5):
41 | self.scale = scale
42 | self.step_size = step_size
43 | if scale != 1.:
44 | self.traversible = cv2.resize(traversible,
45 | (traversible.shape[1] // scale,
46 | traversible.shape[0] // scale),
47 | interpolation=cv2.INTER_NEAREST)
48 | self.traversible = np.rint(self.traversible)
49 | else:
50 | self.traversible = traversible
51 |
52 | self.du = int(self.step_size / (self.scale * 1.))
53 | self.fmm_dist = None
54 |
55 | def set_goal(self, goal, auto_improve=False):
56 | traversible_ma = ma.masked_values(self.traversible * 1, 0)
57 | goal_x, goal_y = int(goal[0] / (self.scale * 1.)), \
58 | int(goal[1] / (self.scale * 1.))
59 |
60 | if self.traversible[goal_x, goal_y] == 0. and auto_improve:
61 | goal_x, goal_y = self._find_nearest_goal([goal_x, goal_y])
62 |
63 | traversible_ma[goal_x, goal_y] = 0
64 | dd = skfmm.distance(traversible_ma, dx=1)
65 | dd = ma.filled(dd, np.max(dd) + 1)
66 | self.fmm_dist = dd
67 | return
68 |
69 | def set_multi_goal(self, goal_map):
70 | traversible_ma = ma.masked_values(self.traversible * 1, 0)
71 | traversible_ma[goal_map == 1] = 0
72 | dd = skfmm.distance(traversible_ma, dx=1)
73 | dd = ma.filled(dd, np.max(dd) + 1)
74 | self.fmm_dist = dd
75 | return
76 |
77 | def get_short_term_goal(self, state):
78 | scale = self.scale * 1.
79 | state = [x / scale for x in state]
80 | dx, dy = state[0] - int(state[0]), state[1] - int(state[1])
81 | mask = get_mask(dx, dy, scale, self.step_size)
82 | dist_mask = get_dist(dx, dy, scale, self.step_size)
83 |
84 | state = [int(x) for x in state]
85 |
86 | dist = np.pad(self.fmm_dist, self.du,
87 | 'constant', constant_values=self.fmm_dist.shape[0] ** 2)
88 | subset = dist[state[0]:state[0] + 2 * self.du + 1,
89 | state[1]:state[1] + 2 * self.du + 1]
90 |
91 | assert subset.shape[0] == 2 * self.du + 1 and \
92 | subset.shape[1] == 2 * self.du + 1, \
93 | "Planning error: unexpected subset shape {}".format(subset.shape)
94 |
95 | subset *= mask
96 | subset += (1 - mask) * self.fmm_dist.shape[0] ** 2
97 |
98 | if subset[self.du, self.du] < 0.25 * 100 / 5.: # 25cm
99 | stop = True
100 | else:
101 | stop = False
102 |
103 | subset -= subset[self.du, self.du]
104 | ratio1 = subset / dist_mask
105 | subset[ratio1 < -1.5] = 1
106 |
107 | (stg_x, stg_y) = np.unravel_index(np.argmin(subset), subset.shape)
108 |
109 | if subset[stg_x, stg_y] > -0.0001:
110 | replan = True
111 | else:
112 | replan = False
113 |
114 | return (stg_x + state[0] - self.du) * scale, \
115 | (stg_y + state[1] - self.du) * scale, replan, stop
116 |
117 | def _find_nearest_goal(self, goal):
118 | traversible = skimage.morphology.binary_dilation(
119 | np.zeros(self.traversible.shape),
120 | skimage.morphology.disk(2)) != True
121 | traversible = traversible * 1.
122 | planner = FMMPlanner(traversible)
123 | planner.set_goal(goal)
124 |
125 | mask = self.traversible
126 |
127 | dist_map = planner.fmm_dist * mask
128 | dist_map[dist_map == 0] = dist_map.max()
129 |
130 | goal = np.unravel_index(dist_map.argmin(), dist_map.shape)
131 |
132 | return goal
133 |
--------------------------------------------------------------------------------
/envs/utils/map_builder.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import envs.utils.depth_utils as du
3 |
4 |
5 | class MapBuilder(object):
6 | def __init__(self, params):
7 | self.params = params
8 | frame_width = params['frame_width']
9 | frame_height = params['frame_height']
10 | fov = params['fov']
11 | self.camera_matrix = du.get_camera_matrix(
12 | frame_width,
13 | frame_height,
14 | fov)
15 | self.vision_range = params['vision_range']
16 |
17 | self.map_size_cm = params['map_size_cm']
18 | self.resolution = params['resolution']
19 | agent_min_z = params['agent_min_z']
20 | agent_max_z = params['agent_max_z']
21 | self.z_bins = [agent_min_z, agent_max_z]
22 | self.du_scale = params['du_scale']
23 | self.visualize = params['visualize']
24 | self.obs_threshold = params['obs_threshold']
25 |
26 | self.map = np.zeros((self.map_size_cm // self.resolution,
27 | self.map_size_cm // self.resolution,
28 | len(self.z_bins) + 1), dtype=np.float32)
29 |
30 | self.agent_height = params['agent_height']
31 | self.agent_view_angle = params['agent_view_angle']
32 | return
33 |
34 | def update_map(self, depth, current_pose):
35 | with np.errstate(invalid="ignore"):
36 | depth[depth > self.vision_range * self.resolution] = np.NaN
37 | point_cloud = du.get_point_cloud_from_z(depth, self.camera_matrix,
38 | scale=self.du_scale)
39 |
40 | agent_view = du.transform_camera_view(point_cloud,
41 | self.agent_height,
42 | self.agent_view_angle)
43 |
44 | shift_loc = [self.vision_range * self.resolution // 2, 0, np.pi / 2.0]
45 | agent_view_centered = du.transform_pose(agent_view, shift_loc)
46 |
47 | agent_view_flat = du.bin_points(
48 | agent_view_centered,
49 | self.vision_range,
50 | self.z_bins,
51 | self.resolution)
52 |
53 | agent_view_cropped = agent_view_flat[:, :, 1]
54 | agent_view_cropped = agent_view_cropped / self.obs_threshold
55 | agent_view_cropped[agent_view_cropped >= 0.5] = 1.0
56 | agent_view_cropped[agent_view_cropped < 0.5] = 0.0
57 |
58 | agent_view_explored = agent_view_flat.sum(2)
59 | agent_view_explored[agent_view_explored > 0] = 1.0
60 |
61 | geocentric_pc = du.transform_pose(agent_view, current_pose)
62 |
63 | geocentric_flat = du.bin_points(
64 | geocentric_pc,
65 | self.map.shape[0],
66 | self.z_bins,
67 | self.resolution)
68 |
69 | self.map = self.map + geocentric_flat
70 |
71 | map_gt = self.map[:, :, 1] / self.obs_threshold
72 | map_gt[map_gt >= 0.5] = 1.0
73 | map_gt[map_gt < 0.5] = 0.0
74 |
75 | explored_gt = self.map.sum(2)
76 | explored_gt[explored_gt > 1] = 1.0
77 |
78 | return agent_view_cropped, map_gt, agent_view_explored, explored_gt
79 |
80 | def get_st_pose(self, current_loc):
81 | loc = [- (current_loc[0] / self.resolution
82 | - self.map_size_cm // (self.resolution * 2)) /
83 | (self.map_size_cm // (self.resolution * 2)),
84 | - (current_loc[1] / self.resolution
85 | - self.map_size_cm // (self.resolution * 2)) /
86 | (self.map_size_cm // (self.resolution * 2)),
87 | 90 - np.rad2deg(current_loc[2])]
88 | return loc
89 |
90 | def reset_map(self, map_size):
91 | self.map_size_cm = map_size
92 |
93 | self.map = np.zeros((self.map_size_cm // self.resolution,
94 | self.map_size_cm // self.resolution,
95 | len(self.z_bins) + 1), dtype=np.float32)
96 |
97 | def get_map(self):
98 | return self.map
99 |
--------------------------------------------------------------------------------
/envs/utils/pose.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def get_l2_distance(x1, x2, y1, y2):
5 | """
6 | Computes the L2 distance between two points.
7 | """
8 | return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
9 |
10 |
11 | def get_rel_pose_change(pos2, pos1):
12 | x1, y1, o1 = pos1
13 | x2, y2, o2 = pos2
14 |
15 | theta = np.arctan2(y2 - y1, x2 - x1) - o1
16 | dist = get_l2_distance(x1, x2, y1, y2)
17 | dx = dist * np.cos(theta)
18 | dy = dist * np.sin(theta)
19 | do = o2 - o1
20 |
21 | return dx, dy, do
22 |
23 |
24 | def get_new_pose(pose, rel_pose_change):
25 | x, y, o = pose
26 | dx, dy, do = rel_pose_change
27 |
28 | global_dx = dx * np.sin(np.deg2rad(o)) + dy * np.cos(np.deg2rad(o))
29 | global_dy = dx * np.cos(np.deg2rad(o)) - dy * np.sin(np.deg2rad(o))
30 | x += global_dy
31 | y += global_dx
32 | o += np.rad2deg(do)
33 | if o > 180.:
34 | o -= 360.
35 |
36 | return x, y, o
37 |
38 |
39 | def threshold_poses(coords, shape):
40 | coords[0] = min(max(0, coords[0]), shape[0] - 1)
41 | coords[1] = min(max(0, coords[1]), shape[1] - 1)
42 | return coords
43 |
--------------------------------------------------------------------------------
/envs/utils/rotation_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Utilities for generating and applying rotation matrices.
17 | """
18 | import numpy as np
19 |
20 | ANGLE_EPS = 0.001
21 |
22 |
23 | def normalize(v):
24 | return v / np.linalg.norm(v)
25 |
26 |
27 | def get_r_matrix(ax_, angle):
28 | ax = normalize(ax_)
29 | if np.abs(angle) > ANGLE_EPS:
30 | S_hat = np.array(
31 | [[0.0, -ax[2], ax[1]], [ax[2], 0.0, -ax[0]], [-ax[1], ax[0], 0.0]],
32 | dtype=np.float32)
33 | R = np.eye(3) + np.sin(angle) * S_hat + \
34 | (1 - np.cos(angle)) * (np.linalg.matrix_power(S_hat, 2))
35 | else:
36 | R = np.eye(3)
37 | return R
38 |
39 |
40 | def r_between(v_from_, v_to_):
41 | v_from = normalize(v_from_)
42 | v_to = normalize(v_to_)
43 | ax = normalize(np.cross(v_from, v_to))
44 | angle = np.arccos(np.dot(v_from, v_to))
45 | return get_r_matrix(ax, angle)
46 |
47 |
48 | def rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to):
49 | inputs = [up_from, lookat_from, up_to, lookat_to]
50 | for i in range(4):
51 | inputs[i] = normalize(np.array(inputs[i]).reshape((-1,)))
52 | up_from, lookat_from, up_to, lookat_to = inputs
53 | r1 = r_between(lookat_from, lookat_to)
54 |
55 | new_x = np.dot(r1, np.array([1, 0, 0]).reshape((-1, 1))).reshape((-1))
56 | to_x = normalize(np.cross(lookat_to, up_to))
57 | angle = np.arccos(np.dot(new_x, to_x))
58 | if angle > ANGLE_EPS:
59 | if angle < np.pi - ANGLE_EPS:
60 | ax = normalize(np.cross(new_x, to_x))
61 | flip = np.dot(lookat_to, ax)
62 | if flip > 0:
63 | r2 = get_r_matrix(lookat_to, angle)
64 | elif flip < 0:
65 | r2 = get_r_matrix(lookat_to, -1. * angle)
66 | else:
67 | # Angle of rotation is too close to 180 degrees, direction of
68 | # rotation does not matter.
69 | r2 = get_r_matrix(lookat_to, angle)
70 | else:
71 | r2 = np.eye(3)
72 | return np.dot(r2, r1)
73 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from collections import deque, defaultdict
2 | import os
3 | import logging
4 | import time
5 | import json
6 | import gym
7 | import torch.nn as nn
8 | import torch
9 | import numpy as np
10 |
11 | from model import RL_Policy, Semantic_Mapping
12 | from utils.storage import GlobalRolloutStorage
13 | from envs import make_vec_envs
14 | from arguments import get_args
15 | import algo
16 |
17 | os.environ["OMP_NUM_THREADS"] = "1"
18 |
19 |
20 | def main():
21 | args = get_args()
22 |
23 | np.random.seed(args.seed)
24 | torch.manual_seed(args.seed)
25 |
26 | if args.cuda:
27 | torch.cuda.manual_seed(args.seed)
28 |
29 | # Setup Logging
30 | log_dir = "{}/models/{}/".format(args.dump_location, args.exp_name)
31 | dump_dir = "{}/dump/{}/".format(args.dump_location, args.exp_name)
32 |
33 | if not os.path.exists(log_dir):
34 | os.makedirs(log_dir)
35 | if not os.path.exists(dump_dir):
36 | os.makedirs(dump_dir)
37 |
38 | logging.basicConfig(
39 | filename=log_dir + 'train.log',
40 | level=logging.INFO)
41 | print("Dumping at {}".format(log_dir))
42 | print(args)
43 | logging.info(args)
44 |
45 | # Logging and loss variables
46 | num_scenes = args.num_processes
47 | num_episodes = int(args.num_eval_episodes)
48 | device = args.device = torch.device("cuda:0" if args.cuda else "cpu")
49 |
50 | g_masks = torch.ones(num_scenes).float().to(device)
51 |
52 | best_g_reward = -np.inf
53 |
54 | if args.eval:
55 | episode_success = []
56 | episode_spl = []
57 | episode_dist = []
58 | for _ in range(args.num_processes):
59 | episode_success.append(deque(maxlen=num_episodes))
60 | episode_spl.append(deque(maxlen=num_episodes))
61 | episode_dist.append(deque(maxlen=num_episodes))
62 |
63 | else:
64 | episode_success = deque(maxlen=1000)
65 | episode_spl = deque(maxlen=1000)
66 | episode_dist = deque(maxlen=1000)
67 |
68 | finished = np.zeros((args.num_processes))
69 | wait_env = np.zeros((args.num_processes))
70 |
71 | g_episode_rewards = deque(maxlen=1000)
72 |
73 | g_value_losses = deque(maxlen=1000)
74 | g_action_losses = deque(maxlen=1000)
75 | g_dist_entropies = deque(maxlen=1000)
76 |
77 | per_step_g_rewards = deque(maxlen=1000)
78 |
79 | g_process_rewards = np.zeros((num_scenes))
80 |
81 | # Starting environments
82 | torch.set_num_threads(1)
83 | envs = make_vec_envs(args)
84 | obs, infos = envs.reset()
85 |
86 | torch.set_grad_enabled(False)
87 |
88 | # Initialize map variables:
89 | # Full map consists of multiple channels containing the following:
90 | # 1. Obstacle Map
91 | # 2. Exploread Area
92 | # 3. Current Agent Location
93 | # 4. Past Agent Locations
94 | # 5,6,7,.. : Semantic Categories
95 | nc = args.num_sem_categories + 4 # num channels
96 |
97 | # Calculating full and local map sizes
98 | map_size = args.map_size_cm // args.map_resolution
99 | full_w, full_h = map_size, map_size
100 | local_w = int(full_w / args.global_downscaling)
101 | local_h = int(full_h / args.global_downscaling)
102 |
103 | # Initializing full and local map
104 | full_map = torch.zeros(num_scenes, nc, full_w, full_h).float().to(device)
105 | local_map = torch.zeros(num_scenes, nc, local_w,
106 | local_h).float().to(device)
107 |
108 | # Initial full and local pose
109 | full_pose = torch.zeros(num_scenes, 3).float().to(device)
110 | local_pose = torch.zeros(num_scenes, 3).float().to(device)
111 |
112 | # Origin of local map
113 | origins = np.zeros((num_scenes, 3))
114 |
115 | # Local Map Boundaries
116 | lmb = np.zeros((num_scenes, 4)).astype(int)
117 |
118 | # Planner pose inputs has 7 dimensions
119 | # 1-3 store continuous global agent location
120 | # 4-7 store local map boundaries
121 | planner_pose_inputs = np.zeros((num_scenes, 7))
122 |
123 | def get_local_map_boundaries(agent_loc, local_sizes, full_sizes):
124 | loc_r, loc_c = agent_loc
125 | local_w, local_h = local_sizes
126 | full_w, full_h = full_sizes
127 |
128 | if args.global_downscaling > 1:
129 | gx1, gy1 = loc_r - local_w // 2, loc_c - local_h // 2
130 | gx2, gy2 = gx1 + local_w, gy1 + local_h
131 | if gx1 < 0:
132 | gx1, gx2 = 0, local_w
133 | if gx2 > full_w:
134 | gx1, gx2 = full_w - local_w, full_w
135 |
136 | if gy1 < 0:
137 | gy1, gy2 = 0, local_h
138 | if gy2 > full_h:
139 | gy1, gy2 = full_h - local_h, full_h
140 | else:
141 | gx1, gx2, gy1, gy2 = 0, full_w, 0, full_h
142 |
143 | return [gx1, gx2, gy1, gy2]
144 |
145 | def init_map_and_pose():
146 | full_map.fill_(0.)
147 | full_pose.fill_(0.)
148 | full_pose[:, :2] = args.map_size_cm / 100.0 / 2.0
149 |
150 | locs = full_pose.cpu().numpy()
151 | planner_pose_inputs[:, :3] = locs
152 | for e in range(num_scenes):
153 | r, c = locs[e, 1], locs[e, 0]
154 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
155 | int(c * 100.0 / args.map_resolution)]
156 |
157 | full_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0
158 |
159 | lmb[e] = get_local_map_boundaries((loc_r, loc_c),
160 | (local_w, local_h),
161 | (full_w, full_h))
162 |
163 | planner_pose_inputs[e, 3:] = lmb[e]
164 | origins[e] = [lmb[e][2] * args.map_resolution / 100.0,
165 | lmb[e][0] * args.map_resolution / 100.0, 0.]
166 |
167 | for e in range(num_scenes):
168 | local_map[e] = full_map[e, :,
169 | lmb[e, 0]:lmb[e, 1],
170 | lmb[e, 2]:lmb[e, 3]]
171 | local_pose[e] = full_pose[e] - \
172 | torch.from_numpy(origins[e]).to(device).float()
173 |
174 | def init_map_and_pose_for_env(e):
175 | full_map[e].fill_(0.)
176 | full_pose[e].fill_(0.)
177 | full_pose[e, :2] = args.map_size_cm / 100.0 / 2.0
178 |
179 | locs = full_pose[e].cpu().numpy()
180 | planner_pose_inputs[e, :3] = locs
181 | r, c = locs[1], locs[0]
182 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
183 | int(c * 100.0 / args.map_resolution)]
184 |
185 | full_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0
186 |
187 | lmb[e] = get_local_map_boundaries((loc_r, loc_c),
188 | (local_w, local_h),
189 | (full_w, full_h))
190 |
191 | planner_pose_inputs[e, 3:] = lmb[e]
192 | origins[e] = [lmb[e][2] * args.map_resolution / 100.0,
193 | lmb[e][0] * args.map_resolution / 100.0, 0.]
194 |
195 | local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]]
196 | local_pose[e] = full_pose[e] - \
197 | torch.from_numpy(origins[e]).to(device).float()
198 |
199 | def update_intrinsic_rew(e):
200 | prev_explored_area = full_map[e, 1].sum(1).sum(0)
201 | full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \
202 | local_map[e]
203 | curr_explored_area = full_map[e, 1].sum(1).sum(0)
204 | intrinsic_rews[e] = curr_explored_area - prev_explored_area
205 | intrinsic_rews[e] *= (args.map_resolution / 100.)**2 # to m^2
206 |
207 | init_map_and_pose()
208 |
209 | # Global policy observation space
210 | ngc = 8 + args.num_sem_categories
211 | es = 2
212 | g_observation_space = gym.spaces.Box(0, 1,
213 | (ngc,
214 | local_w,
215 | local_h), dtype='uint8')
216 |
217 | # Global policy action space
218 | g_action_space = gym.spaces.Box(low=0.0, high=0.99,
219 | shape=(2,), dtype=np.float32)
220 |
221 | # Global policy recurrent layer size
222 | g_hidden_size = args.global_hidden_size
223 |
224 | # Semantic Mapping
225 | sem_map_module = Semantic_Mapping(args).to(device)
226 | sem_map_module.eval()
227 |
228 | # Global policy
229 | g_policy = RL_Policy(g_observation_space.shape, g_action_space,
230 | model_type=1,
231 | base_kwargs={'recurrent': args.use_recurrent_global,
232 | 'hidden_size': g_hidden_size,
233 | 'num_sem_categories': ngc - 8
234 | }).to(device)
235 | g_agent = algo.PPO(g_policy, args.clip_param, args.ppo_epoch,
236 | args.num_mini_batch, args.value_loss_coef,
237 | args.entropy_coef, lr=args.lr, eps=args.eps,
238 | max_grad_norm=args.max_grad_norm)
239 |
240 | global_input = torch.zeros(num_scenes, ngc, local_w, local_h)
241 | global_orientation = torch.zeros(num_scenes, 1).long()
242 | intrinsic_rews = torch.zeros(num_scenes).to(device)
243 | extras = torch.zeros(num_scenes, 2)
244 |
245 | # Storage
246 | g_rollouts = GlobalRolloutStorage(args.num_global_steps,
247 | num_scenes, g_observation_space.shape,
248 | g_action_space, g_policy.rec_state_size,
249 | es).to(device)
250 |
251 | if args.load != "0":
252 | print("Loading model {}".format(args.load))
253 | state_dict = torch.load(args.load,
254 | map_location=lambda storage, loc: storage)
255 | g_policy.load_state_dict(state_dict)
256 |
257 | if args.eval:
258 | g_policy.eval()
259 |
260 | # Predict semantic map from frame 1
261 | poses = torch.from_numpy(np.asarray(
262 | [infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes)])
263 | ).float().to(device)
264 |
265 | _, local_map, _, local_pose = \
266 | sem_map_module(obs, poses, local_map, local_pose)
267 |
268 | # Compute Global policy input
269 | locs = local_pose.cpu().numpy()
270 | global_input = torch.zeros(num_scenes, ngc, local_w, local_h)
271 | global_orientation = torch.zeros(num_scenes, 1).long()
272 |
273 | for e in range(num_scenes):
274 | r, c = locs[e, 1], locs[e, 0]
275 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
276 | int(c * 100.0 / args.map_resolution)]
277 |
278 | local_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.
279 | global_orientation[e] = int((locs[e, 2] + 180.0) / 5.)
280 |
281 | global_input[:, 0:4, :, :] = local_map[:, 0:4, :, :].detach()
282 | global_input[:, 4:8, :, :] = nn.MaxPool2d(args.global_downscaling)(
283 | full_map[:, 0:4, :, :])
284 | global_input[:, 8:, :, :] = local_map[:, 4:, :, :].detach()
285 | goal_cat_id = torch.from_numpy(np.asarray(
286 | [infos[env_idx]['goal_cat_id'] for env_idx
287 | in range(num_scenes)]))
288 |
289 | extras = torch.zeros(num_scenes, 2)
290 | extras[:, 0] = global_orientation[:, 0]
291 | extras[:, 1] = goal_cat_id
292 |
293 | g_rollouts.obs[0].copy_(global_input)
294 | g_rollouts.extras[0].copy_(extras)
295 |
296 | # Run Global Policy (global_goals = Long-Term Goal)
297 | g_value, g_action, g_action_log_prob, g_rec_states = \
298 | g_policy.act(
299 | g_rollouts.obs[0],
300 | g_rollouts.rec_states[0],
301 | g_rollouts.masks[0],
302 | extras=g_rollouts.extras[0],
303 | deterministic=False
304 | )
305 |
306 | cpu_actions = nn.Sigmoid()(g_action).cpu().numpy()
307 | global_goals = [[int(action[0] * local_w), int(action[1] * local_h)]
308 | for action in cpu_actions]
309 | global_goals = [[min(x, int(local_w - 1)), min(y, int(local_h - 1))]
310 | for x, y in global_goals]
311 |
312 | goal_maps = [np.zeros((local_w, local_h)) for _ in range(num_scenes)]
313 |
314 | for e in range(num_scenes):
315 | goal_maps[e][global_goals[e][0], global_goals[e][1]] = 1
316 |
317 | planner_inputs = [{} for e in range(num_scenes)]
318 | for e, p_input in enumerate(planner_inputs):
319 | p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy()
320 | p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy()
321 | p_input['pose_pred'] = planner_pose_inputs[e]
322 | p_input['goal'] = goal_maps[e] # global_goals[e]
323 | p_input['new_goal'] = 1
324 | p_input['found_goal'] = 0
325 | p_input['wait'] = wait_env[e] or finished[e]
326 | if args.visualize or args.print_images:
327 | local_map[e, -1, :, :] = 1e-5
328 | p_input['sem_map_pred'] = local_map[e, 4:, :, :
329 | ].argmax(0).cpu().numpy()
330 |
331 | obs, _, done, infos = envs.plan_act_and_preprocess(planner_inputs)
332 |
333 | start = time.time()
334 | g_reward = 0
335 |
336 | torch.set_grad_enabled(False)
337 | spl_per_category = defaultdict(list)
338 | success_per_category = defaultdict(list)
339 |
340 | for step in range(args.num_training_frames // args.num_processes + 1):
341 | if finished.sum() == args.num_processes:
342 | break
343 |
344 | g_step = (step // args.num_local_steps) % args.num_global_steps
345 | l_step = step % args.num_local_steps
346 |
347 | # ------------------------------------------------------------------
348 | # Reinitialize variables when episode ends
349 | l_masks = torch.FloatTensor([0 if x else 1
350 | for x in done]).to(device)
351 | g_masks *= l_masks
352 |
353 | for e, x in enumerate(done):
354 | if x:
355 | spl = infos[e]['spl']
356 | success = infos[e]['success']
357 | dist = infos[e]['distance_to_goal']
358 | spl_per_category[infos[e]['goal_name']].append(spl)
359 | success_per_category[infos[e]['goal_name']].append(success)
360 | if args.eval:
361 | episode_success[e].append(success)
362 | episode_spl[e].append(spl)
363 | episode_dist[e].append(dist)
364 | if len(episode_success[e]) == num_episodes:
365 | finished[e] = 1
366 | else:
367 | episode_success.append(success)
368 | episode_spl.append(spl)
369 | episode_dist.append(dist)
370 | wait_env[e] = 1.
371 | update_intrinsic_rew(e)
372 | init_map_and_pose_for_env(e)
373 | # ------------------------------------------------------------------
374 |
375 | # ------------------------------------------------------------------
376 | # Semantic Mapping Module
377 | poses = torch.from_numpy(np.asarray(
378 | [infos[env_idx]['sensor_pose'] for env_idx
379 | in range(num_scenes)])
380 | ).float().to(device)
381 |
382 | _, local_map, _, local_pose = \
383 | sem_map_module(obs, poses, local_map, local_pose)
384 |
385 | locs = local_pose.cpu().numpy()
386 | planner_pose_inputs[:, :3] = locs + origins
387 | local_map[:, 2, :, :].fill_(0.) # Resetting current location channel
388 | for e in range(num_scenes):
389 | r, c = locs[e, 1], locs[e, 0]
390 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
391 | int(c * 100.0 / args.map_resolution)]
392 | local_map[e, 2:4, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1.
393 |
394 | # ------------------------------------------------------------------
395 |
396 | # ------------------------------------------------------------------
397 | # Global Policy
398 | if l_step == args.num_local_steps - 1:
399 | # For every global step, update the full and local maps
400 | for e in range(num_scenes):
401 | if wait_env[e] == 1: # New episode
402 | wait_env[e] = 0.
403 | else:
404 | update_intrinsic_rew(e)
405 |
406 | full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \
407 | local_map[e]
408 | full_pose[e] = local_pose[e] + \
409 | torch.from_numpy(origins[e]).to(device).float()
410 |
411 | locs = full_pose[e].cpu().numpy()
412 | r, c = locs[1], locs[0]
413 | loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
414 | int(c * 100.0 / args.map_resolution)]
415 |
416 | lmb[e] = get_local_map_boundaries((loc_r, loc_c),
417 | (local_w, local_h),
418 | (full_w, full_h))
419 |
420 | planner_pose_inputs[e, 3:] = lmb[e]
421 | origins[e] = [lmb[e][2] * args.map_resolution / 100.0,
422 | lmb[e][0] * args.map_resolution / 100.0, 0.]
423 |
424 | local_map[e] = full_map[e, :,
425 | lmb[e, 0]:lmb[e, 1],
426 | lmb[e, 2]:lmb[e, 3]]
427 | local_pose[e] = full_pose[e] - \
428 | torch.from_numpy(origins[e]).to(device).float()
429 |
430 | locs = local_pose.cpu().numpy()
431 | for e in range(num_scenes):
432 | global_orientation[e] = int((locs[e, 2] + 180.0) / 5.)
433 | global_input[:, 0:4, :, :] = local_map[:, 0:4, :, :]
434 | global_input[:, 4:8, :, :] = \
435 | nn.MaxPool2d(args.global_downscaling)(
436 | full_map[:, 0:4, :, :])
437 | global_input[:, 8:, :, :] = local_map[:, 4:, :, :].detach()
438 | goal_cat_id = torch.from_numpy(np.asarray(
439 | [infos[env_idx]['goal_cat_id'] for env_idx
440 | in range(num_scenes)]))
441 | extras[:, 0] = global_orientation[:, 0]
442 | extras[:, 1] = goal_cat_id
443 |
444 | # Get exploration reward and metrics
445 | g_reward = torch.from_numpy(np.asarray(
446 | [infos[env_idx]['g_reward'] for env_idx in range(num_scenes)])
447 | ).float().to(device)
448 | g_reward += args.intrinsic_rew_coeff * intrinsic_rews.detach()
449 |
450 | g_process_rewards += g_reward.cpu().numpy()
451 | g_total_rewards = g_process_rewards * \
452 | (1 - g_masks.cpu().numpy())
453 | g_process_rewards *= g_masks.cpu().numpy()
454 | per_step_g_rewards.append(np.mean(g_reward.cpu().numpy()))
455 |
456 | if np.sum(g_total_rewards) != 0:
457 | for total_rew in g_total_rewards:
458 | if total_rew != 0:
459 | g_episode_rewards.append(total_rew)
460 |
461 | # Add samples to global policy storage
462 | if step == 0:
463 | g_rollouts.obs[0].copy_(global_input)
464 | g_rollouts.extras[0].copy_(extras)
465 | else:
466 | g_rollouts.insert(
467 | global_input, g_rec_states,
468 | g_action, g_action_log_prob, g_value,
469 | g_reward, g_masks, extras
470 | )
471 |
472 | # Sample long-term goal from global policy
473 | g_value, g_action, g_action_log_prob, g_rec_states = \
474 | g_policy.act(
475 | g_rollouts.obs[g_step + 1],
476 | g_rollouts.rec_states[g_step + 1],
477 | g_rollouts.masks[g_step + 1],
478 | extras=g_rollouts.extras[g_step + 1],
479 | deterministic=False
480 | )
481 | cpu_actions = nn.Sigmoid()(g_action).cpu().numpy()
482 | global_goals = [[int(action[0] * local_w),
483 | int(action[1] * local_h)]
484 | for action in cpu_actions]
485 | global_goals = [[min(x, int(local_w - 1)),
486 | min(y, int(local_h - 1))]
487 | for x, y in global_goals]
488 |
489 | g_reward = 0
490 | g_masks = torch.ones(num_scenes).float().to(device)
491 |
492 | # ------------------------------------------------------------------
493 |
494 | # ------------------------------------------------------------------
495 | # Update long-term goal if target object is found
496 | found_goal = [0 for _ in range(num_scenes)]
497 | goal_maps = [np.zeros((local_w, local_h)) for _ in range(num_scenes)]
498 |
499 | for e in range(num_scenes):
500 | goal_maps[e][global_goals[e][0], global_goals[e][1]] = 1
501 |
502 | for e in range(num_scenes):
503 | cn = infos[e]['goal_cat_id'] + 4
504 | if local_map[e, cn, :, :].sum() != 0.:
505 | cat_semantic_map = local_map[e, cn, :, :].cpu().numpy()
506 | cat_semantic_scores = cat_semantic_map
507 | cat_semantic_scores[cat_semantic_scores > 0] = 1.
508 | goal_maps[e] = cat_semantic_scores
509 | found_goal[e] = 1
510 | # ------------------------------------------------------------------
511 |
512 | # ------------------------------------------------------------------
513 | # Take action and get next observation
514 | planner_inputs = [{} for e in range(num_scenes)]
515 | for e, p_input in enumerate(planner_inputs):
516 | p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy()
517 | p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy()
518 | p_input['pose_pred'] = planner_pose_inputs[e]
519 | p_input['goal'] = goal_maps[e] # global_goals[e]
520 | p_input['new_goal'] = l_step == args.num_local_steps - 1
521 | p_input['found_goal'] = found_goal[e]
522 | p_input['wait'] = wait_env[e] or finished[e]
523 | if args.visualize or args.print_images:
524 | local_map[e, -1, :, :] = 1e-5
525 | p_input['sem_map_pred'] = local_map[e, 4:, :,
526 | :].argmax(0).cpu().numpy()
527 |
528 | obs, _, done, infos = envs.plan_act_and_preprocess(planner_inputs)
529 | # ------------------------------------------------------------------
530 |
531 | # ------------------------------------------------------------------
532 | # Training
533 | torch.set_grad_enabled(True)
534 | if g_step % args.num_global_steps == args.num_global_steps - 1 \
535 | and l_step == args.num_local_steps - 1:
536 | if not args.eval:
537 | g_next_value = g_policy.get_value(
538 | g_rollouts.obs[-1],
539 | g_rollouts.rec_states[-1],
540 | g_rollouts.masks[-1],
541 | extras=g_rollouts.extras[-1]
542 | ).detach()
543 |
544 | g_rollouts.compute_returns(g_next_value, args.use_gae,
545 | args.gamma, args.tau)
546 | g_value_loss, g_action_loss, g_dist_entropy = \
547 | g_agent.update(g_rollouts)
548 | g_value_losses.append(g_value_loss)
549 | g_action_losses.append(g_action_loss)
550 | g_dist_entropies.append(g_dist_entropy)
551 | g_rollouts.after_update()
552 |
553 | torch.set_grad_enabled(False)
554 | # ------------------------------------------------------------------
555 |
556 | # ------------------------------------------------------------------
557 | # Logging
558 | if step % args.log_interval == 0:
559 | end = time.time()
560 | time_elapsed = time.gmtime(end - start)
561 | log = " ".join([
562 | "Time: {0:0=2d}d".format(time_elapsed.tm_mday - 1),
563 | "{},".format(time.strftime("%Hh %Mm %Ss", time_elapsed)),
564 | "num timesteps {},".format(step * num_scenes),
565 | "FPS {},".format(int(step * num_scenes / (end - start)))
566 | ])
567 |
568 | log += "\n\tRewards:"
569 |
570 | if len(g_episode_rewards) > 0:
571 | log += " ".join([
572 | " Global step mean/med rew:",
573 | "{:.4f}/{:.4f},".format(
574 | np.mean(per_step_g_rewards),
575 | np.median(per_step_g_rewards)),
576 | " Global eps mean/med/min/max eps rew:",
577 | "{:.3f}/{:.3f}/{:.3f}/{:.3f},".format(
578 | np.mean(g_episode_rewards),
579 | np.median(g_episode_rewards),
580 | np.min(g_episode_rewards),
581 | np.max(g_episode_rewards))
582 | ])
583 |
584 | if args.eval:
585 | total_success = []
586 | total_spl = []
587 | total_dist = []
588 | for e in range(args.num_processes):
589 | for acc in episode_success[e]:
590 | total_success.append(acc)
591 | for dist in episode_dist[e]:
592 | total_dist.append(dist)
593 | for spl in episode_spl[e]:
594 | total_spl.append(spl)
595 |
596 | if len(total_spl) > 0:
597 | log += " ObjectNav succ/spl/dtg:"
598 | log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format(
599 | np.mean(total_success),
600 | np.mean(total_spl),
601 | np.mean(total_dist),
602 | len(total_spl))
603 | else:
604 | if len(episode_success) > 100:
605 | log += " ObjectNav succ/spl/dtg:"
606 | log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format(
607 | np.mean(episode_success),
608 | np.mean(episode_spl),
609 | np.mean(episode_dist),
610 | len(episode_spl))
611 |
612 | log += "\n\tLosses:"
613 | if len(g_value_losses) > 0 and not args.eval:
614 | log += " ".join([
615 | " Policy Loss value/action/dist:",
616 | "{:.3f}/{:.3f}/{:.3f},".format(
617 | np.mean(g_value_losses),
618 | np.mean(g_action_losses),
619 | np.mean(g_dist_entropies))
620 | ])
621 |
622 | print(log)
623 | logging.info(log)
624 | # ------------------------------------------------------------------
625 |
626 | # ------------------------------------------------------------------
627 | # Save best models
628 | if (step * num_scenes) % args.save_interval < \
629 | num_scenes:
630 | if len(g_episode_rewards) >= 1000 and \
631 | (np.mean(g_episode_rewards) >= best_g_reward) \
632 | and not args.eval:
633 | torch.save(g_policy.state_dict(),
634 | os.path.join(log_dir, "model_best.pth"))
635 | best_g_reward = np.mean(g_episode_rewards)
636 |
637 | # Save periodic models
638 | if (step * num_scenes) % args.save_periodic < \
639 | num_scenes:
640 | total_steps = step * num_scenes
641 | if not args.eval:
642 | torch.save(g_policy.state_dict(),
643 | os.path.join(dump_dir,
644 | "periodic_{}.pth".format(total_steps)))
645 | # ------------------------------------------------------------------
646 |
647 | # Print and save model performance numbers during evaluation
648 | if args.eval:
649 | print("Dumping eval details...")
650 |
651 | total_success = []
652 | total_spl = []
653 | total_dist = []
654 | for e in range(args.num_processes):
655 | for acc in episode_success[e]:
656 | total_success.append(acc)
657 | for dist in episode_dist[e]:
658 | total_dist.append(dist)
659 | for spl in episode_spl[e]:
660 | total_spl.append(spl)
661 |
662 | if len(total_spl) > 0:
663 | log = "Final ObjectNav succ/spl/dtg:"
664 | log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format(
665 | np.mean(total_success),
666 | np.mean(total_spl),
667 | np.mean(total_dist),
668 | len(total_spl))
669 |
670 | print(log)
671 | logging.info(log)
672 |
673 | # Save the spl per category
674 | log = "Success | SPL per category\n"
675 | for key in success_per_category:
676 | log += "{}: {} | {}\n".format(key,
677 | sum(success_per_category[key]) /
678 | len(success_per_category[key]),
679 | sum(spl_per_category[key]) /
680 | len(spl_per_category[key]))
681 |
682 | print(log)
683 | logging.info(log)
684 |
685 | with open('{}/{}_spl_per_cat_pred_thr.json'.format(
686 | dump_dir, args.split), 'w') as f:
687 | json.dump(spl_per_category, f)
688 |
689 | with open('{}/{}_success_per_cat_pred_thr.json'.format(
690 | dump_dir, args.split), 'w') as f:
691 | json.dump(success_per_category, f)
692 |
693 |
694 | if __name__ == "__main__":
695 | main()
696 |
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import functional as F
4 | import numpy as np
5 |
6 | from utils.distributions import Categorical, DiagGaussian
7 | from utils.model import get_grid, ChannelPool, Flatten, NNBase
8 | import envs.utils.depth_utils as du
9 |
10 |
11 | class Goal_Oriented_Semantic_Policy(NNBase):
12 |
13 | def __init__(self, input_shape, recurrent=False, hidden_size=512,
14 | num_sem_categories=16):
15 | super(Goal_Oriented_Semantic_Policy, self).__init__(
16 | recurrent, hidden_size, hidden_size)
17 |
18 | out_size = int(input_shape[1] / 16.) * int(input_shape[2] / 16.)
19 |
20 | self.main = nn.Sequential(
21 | nn.MaxPool2d(2),
22 | nn.Conv2d(num_sem_categories + 8, 32, 3, stride=1, padding=1),
23 | nn.ReLU(),
24 | nn.MaxPool2d(2),
25 | nn.Conv2d(32, 64, 3, stride=1, padding=1),
26 | nn.ReLU(),
27 | nn.MaxPool2d(2),
28 | nn.Conv2d(64, 128, 3, stride=1, padding=1),
29 | nn.ReLU(),
30 | nn.MaxPool2d(2),
31 | nn.Conv2d(128, 64, 3, stride=1, padding=1),
32 | nn.ReLU(),
33 | nn.Conv2d(64, 32, 3, stride=1, padding=1),
34 | nn.ReLU(),
35 | Flatten()
36 | )
37 |
38 | self.linear1 = nn.Linear(out_size * 32 + 8 * 2, hidden_size)
39 | self.linear2 = nn.Linear(hidden_size, 256)
40 | self.critic_linear = nn.Linear(256, 1)
41 | self.orientation_emb = nn.Embedding(72, 8)
42 | self.goal_emb = nn.Embedding(num_sem_categories, 8)
43 | self.train()
44 |
45 | def forward(self, inputs, rnn_hxs, masks, extras):
46 | x = self.main(inputs)
47 | orientation_emb = self.orientation_emb(extras[:, 0])
48 | goal_emb = self.goal_emb(extras[:, 1])
49 |
50 | x = torch.cat((x, orientation_emb, goal_emb), 1)
51 |
52 | x = nn.ReLU()(self.linear1(x))
53 | if self.is_recurrent:
54 | x, rnn_hxs = self._forward_gru(x, rnn_hxs, masks)
55 |
56 | x = nn.ReLU()(self.linear2(x))
57 |
58 | return self.critic_linear(x).squeeze(-1), x, rnn_hxs
59 |
60 |
61 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L15
62 | class RL_Policy(nn.Module):
63 |
64 | def __init__(self, obs_shape, action_space, model_type=0,
65 | base_kwargs=None):
66 |
67 | super(RL_Policy, self).__init__()
68 | if base_kwargs is None:
69 | base_kwargs = {}
70 |
71 | if model_type == 1:
72 | self.network = Goal_Oriented_Semantic_Policy(
73 | obs_shape, **base_kwargs)
74 | else:
75 | raise NotImplementedError
76 |
77 | if action_space.__class__.__name__ == "Discrete":
78 | num_outputs = action_space.n
79 | self.dist = Categorical(self.network.output_size, num_outputs)
80 | elif action_space.__class__.__name__ == "Box":
81 | num_outputs = action_space.shape[0]
82 | self.dist = DiagGaussian(self.network.output_size, num_outputs)
83 | else:
84 | raise NotImplementedError
85 |
86 | self.model_type = model_type
87 |
88 | @property
89 | def is_recurrent(self):
90 | return self.network.is_recurrent
91 |
92 | @property
93 | def rec_state_size(self):
94 | """Size of rnn_hx."""
95 | return self.network.rec_state_size
96 |
97 | def forward(self, inputs, rnn_hxs, masks, extras):
98 | if extras is None:
99 | return self.network(inputs, rnn_hxs, masks)
100 | else:
101 | return self.network(inputs, rnn_hxs, masks, extras)
102 |
103 | def act(self, inputs, rnn_hxs, masks, extras=None, deterministic=False):
104 |
105 | value, actor_features, rnn_hxs = self(inputs, rnn_hxs, masks, extras)
106 | dist = self.dist(actor_features)
107 |
108 | if deterministic:
109 | action = dist.mode()
110 | else:
111 | action = dist.sample()
112 |
113 | action_log_probs = dist.log_probs(action)
114 |
115 | return value, action, action_log_probs, rnn_hxs
116 |
117 | def get_value(self, inputs, rnn_hxs, masks, extras=None):
118 | value, _, _ = self(inputs, rnn_hxs, masks, extras)
119 | return value
120 |
121 | def evaluate_actions(self, inputs, rnn_hxs, masks, action, extras=None):
122 |
123 | value, actor_features, rnn_hxs = self(inputs, rnn_hxs, masks, extras)
124 | dist = self.dist(actor_features)
125 |
126 | action_log_probs = dist.log_probs(action)
127 | dist_entropy = dist.entropy().mean()
128 |
129 | return value, action_log_probs, dist_entropy, rnn_hxs
130 |
131 |
132 | class Semantic_Mapping(nn.Module):
133 |
134 | """
135 | Semantic_Mapping
136 | """
137 |
138 | def __init__(self, args):
139 | super(Semantic_Mapping, self).__init__()
140 |
141 | self.device = args.device
142 | self.screen_h = args.frame_height
143 | self.screen_w = args.frame_width
144 | self.resolution = args.map_resolution
145 | self.z_resolution = args.map_resolution
146 | self.map_size_cm = args.map_size_cm // args.global_downscaling
147 | self.n_channels = 3
148 | self.vision_range = args.vision_range
149 | self.dropout = 0.5
150 | self.fov = args.hfov
151 | self.du_scale = args.du_scale
152 | self.cat_pred_threshold = args.cat_pred_threshold
153 | self.exp_pred_threshold = args.exp_pred_threshold
154 | self.map_pred_threshold = args.map_pred_threshold
155 | self.num_sem_categories = args.num_sem_categories
156 |
157 | self.max_height = int(360 / self.z_resolution)
158 | self.min_height = int(-40 / self.z_resolution)
159 | self.agent_height = args.camera_height * 100.
160 | self.shift_loc = [self.vision_range *
161 | self.resolution // 2, 0, np.pi / 2.0]
162 | self.camera_matrix = du.get_camera_matrix(
163 | self.screen_w, self.screen_h, self.fov)
164 |
165 | self.pool = ChannelPool(1)
166 |
167 | vr = self.vision_range
168 |
169 | self.init_grid = torch.zeros(
170 | args.num_processes, 1 + self.num_sem_categories, vr, vr,
171 | self.max_height - self.min_height
172 | ).float().to(self.device)
173 | self.feat = torch.ones(
174 | args.num_processes, 1 + self.num_sem_categories,
175 | self.screen_h // self.du_scale * self.screen_w // self.du_scale
176 | ).float().to(self.device)
177 |
178 | def forward(self, obs, pose_obs, maps_last, poses_last):
179 | bs, c, h, w = obs.size()
180 | depth = obs[:, 3, :, :]
181 |
182 | point_cloud_t = du.get_point_cloud_from_z_t(
183 | depth, self.camera_matrix, self.device, scale=self.du_scale)
184 |
185 | agent_view_t = du.transform_camera_view_t(
186 | point_cloud_t, self.agent_height, 0, self.device)
187 |
188 | agent_view_centered_t = du.transform_pose_t(
189 | agent_view_t, self.shift_loc, self.device)
190 |
191 | max_h = self.max_height
192 | min_h = self.min_height
193 | xy_resolution = self.resolution
194 | z_resolution = self.z_resolution
195 | vision_range = self.vision_range
196 | XYZ_cm_std = agent_view_centered_t.float()
197 | XYZ_cm_std[..., :2] = (XYZ_cm_std[..., :2] / xy_resolution)
198 | XYZ_cm_std[..., :2] = (XYZ_cm_std[..., :2] -
199 | vision_range // 2.) / vision_range * 2.
200 | XYZ_cm_std[..., 2] = XYZ_cm_std[..., 2] / z_resolution
201 | XYZ_cm_std[..., 2] = (XYZ_cm_std[..., 2] -
202 | (max_h + min_h) // 2.) / (max_h - min_h) * 2.
203 | self.feat[:, 1:, :] = nn.AvgPool2d(self.du_scale)(
204 | obs[:, 4:, :, :]
205 | ).view(bs, c - 4, h // self.du_scale * w // self.du_scale)
206 |
207 | XYZ_cm_std = XYZ_cm_std.permute(0, 3, 1, 2)
208 | XYZ_cm_std = XYZ_cm_std.view(XYZ_cm_std.shape[0],
209 | XYZ_cm_std.shape[1],
210 | XYZ_cm_std.shape[2] * XYZ_cm_std.shape[3])
211 |
212 | voxels = du.splat_feat_nd(
213 | self.init_grid * 0., self.feat, XYZ_cm_std).transpose(2, 3)
214 |
215 | min_z = int(25 / z_resolution - min_h)
216 | max_z = int((self.agent_height + 1) / z_resolution - min_h)
217 |
218 | agent_height_proj = voxels[..., min_z:max_z].sum(4)
219 | all_height_proj = voxels.sum(4)
220 |
221 | fp_map_pred = agent_height_proj[:, 0:1, :, :]
222 | fp_exp_pred = all_height_proj[:, 0:1, :, :]
223 | fp_map_pred = fp_map_pred / self.map_pred_threshold
224 | fp_exp_pred = fp_exp_pred / self.exp_pred_threshold
225 | fp_map_pred = torch.clamp(fp_map_pred, min=0.0, max=1.0)
226 | fp_exp_pred = torch.clamp(fp_exp_pred, min=0.0, max=1.0)
227 |
228 | pose_pred = poses_last
229 |
230 | agent_view = torch.zeros(bs, c,
231 | self.map_size_cm // self.resolution,
232 | self.map_size_cm // self.resolution
233 | ).to(self.device)
234 |
235 | x1 = self.map_size_cm // (self.resolution * 2) - self.vision_range // 2
236 | x2 = x1 + self.vision_range
237 | y1 = self.map_size_cm // (self.resolution * 2)
238 | y2 = y1 + self.vision_range
239 | agent_view[:, 0:1, y1:y2, x1:x2] = fp_map_pred
240 | agent_view[:, 1:2, y1:y2, x1:x2] = fp_exp_pred
241 | agent_view[:, 4:, y1:y2, x1:x2] = torch.clamp(
242 | agent_height_proj[:, 1:, :, :] / self.cat_pred_threshold,
243 | min=0.0, max=1.0)
244 |
245 | corrected_pose = pose_obs
246 |
247 | def get_new_pose_batch(pose, rel_pose_change):
248 |
249 | pose[:, 1] += rel_pose_change[:, 0] * \
250 | torch.sin(pose[:, 2] / 57.29577951308232) \
251 | + rel_pose_change[:, 1] * \
252 | torch.cos(pose[:, 2] / 57.29577951308232)
253 | pose[:, 0] += rel_pose_change[:, 0] * \
254 | torch.cos(pose[:, 2] / 57.29577951308232) \
255 | - rel_pose_change[:, 1] * \
256 | torch.sin(pose[:, 2] / 57.29577951308232)
257 | pose[:, 2] += rel_pose_change[:, 2] * 57.29577951308232
258 |
259 | pose[:, 2] = torch.fmod(pose[:, 2] - 180.0, 360.0) + 180.0
260 | pose[:, 2] = torch.fmod(pose[:, 2] + 180.0, 360.0) - 180.0
261 |
262 | return pose
263 |
264 | current_poses = get_new_pose_batch(poses_last, corrected_pose)
265 | st_pose = current_poses.clone().detach()
266 |
267 | st_pose[:, :2] = - (st_pose[:, :2]
268 | * 100.0 / self.resolution
269 | - self.map_size_cm // (self.resolution * 2)) /\
270 | (self.map_size_cm // (self.resolution * 2))
271 | st_pose[:, 2] = 90. - (st_pose[:, 2])
272 |
273 | rot_mat, trans_mat = get_grid(st_pose, agent_view.size(),
274 | self.device)
275 |
276 | rotated = F.grid_sample(agent_view, rot_mat, align_corners=True)
277 | translated = F.grid_sample(rotated, trans_mat, align_corners=True)
278 |
279 | maps2 = torch.cat((maps_last.unsqueeze(1), translated.unsqueeze(1)), 1)
280 |
281 | map_pred, _ = torch.max(maps2, 1)
282 |
283 | return fp_map_pred, map_pred, pose_pred, current_poses
284 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-fmm==2019.1.30
2 | scikit-learn==0.22.2.post1
3 | scikit-image==0.15.0
4 | numpy>=1.20.2
5 | ifcfg
6 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import numpy as np
4 |
5 | from envs import make_vec_envs
6 | from arguments import get_args
7 |
8 | os.environ["OMP_NUM_THREADS"] = "1"
9 |
10 | args = get_args()
11 |
12 | np.random.seed(args.seed)
13 | torch.manual_seed(args.seed)
14 |
15 | if args.cuda:
16 | torch.cuda.manual_seed(args.seed)
17 |
18 |
19 | def main():
20 | num_episodes = int(args.num_eval_episodes)
21 | args.device = torch.device("cuda:0" if args.cuda else "cpu")
22 |
23 | torch.set_num_threads(1)
24 | envs = make_vec_envs(args)
25 | obs, infos = envs.reset()
26 |
27 | for ep_num in range(num_episodes):
28 | for step in range(args.max_episode_length):
29 | action = torch.randint(0, 3, (args.num_processes,))
30 | obs, rew, done, infos = envs.step(action)
31 |
32 | if done:
33 | break
34 |
35 | print("Test successfully completed")
36 |
37 |
38 | if __name__ == "__main__":
39 | main()
40 |
--------------------------------------------------------------------------------
/utils/distributions.py:
--------------------------------------------------------------------------------
1 | # The following code is largely borrowed from:
2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/distributions.py
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | from utils.model import AddBias
8 |
9 | """
10 | Modify standard PyTorch distributions so they are compatible with this code.
11 | """
12 |
13 | FixedCategorical = torch.distributions.Categorical
14 |
15 | old_sample = FixedCategorical.sample
16 | FixedCategorical.sample = lambda self: old_sample(self)
17 |
18 | log_prob_cat = FixedCategorical.log_prob
19 | FixedCategorical.log_probs = lambda self, actions: \
20 | log_prob_cat(self, actions.squeeze(-1))
21 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True)
22 |
23 | FixedNormal = torch.distributions.Normal
24 | log_prob_normal = FixedNormal.log_prob
25 | FixedNormal.log_probs = lambda self, actions: \
26 | log_prob_normal(self, actions).sum(-1, keepdim=False)
27 |
28 | entropy = FixedNormal.entropy
29 | FixedNormal.entropy = lambda self: entropy(self).sum(-1)
30 |
31 | FixedNormal.mode = lambda self: self.mean
32 |
33 |
34 | class Categorical(nn.Module):
35 |
36 | def __init__(self, num_inputs, num_outputs):
37 | super(Categorical, self).__init__()
38 | self.linear = nn.Linear(num_inputs, num_outputs)
39 |
40 | def forward(self, x):
41 | x = self.linear(x)
42 | return FixedCategorical(logits=x)
43 |
44 |
45 | class DiagGaussian(nn.Module):
46 |
47 | def __init__(self, num_inputs, num_outputs):
48 | super(DiagGaussian, self).__init__()
49 |
50 | self.fc_mean = nn.Linear(num_inputs, num_outputs)
51 | self.logstd = AddBias(torch.zeros(num_outputs))
52 |
53 | def forward(self, x):
54 | action_mean = self.fc_mean(x)
55 |
56 | zeros = torch.zeros(action_mean.size())
57 | if x.is_cuda:
58 | zeros = zeros.cuda()
59 |
60 | action_logstd = self.logstd(zeros)
61 | return FixedNormal(action_mean, action_logstd.exp())
62 |
--------------------------------------------------------------------------------
/utils/model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch import nn
4 | from torch.nn import functional as F
5 |
6 |
7 | def get_grid(pose, grid_size, device):
8 | """
9 | Input:
10 | `pose` FloatTensor(bs, 3)
11 | `grid_size` 4-tuple (bs, _, grid_h, grid_w)
12 | `device` torch.device (cpu or gpu)
13 | Output:
14 | `rot_grid` FloatTensor(bs, grid_h, grid_w, 2)
15 | `trans_grid` FloatTensor(bs, grid_h, grid_w, 2)
16 |
17 | """
18 | pose = pose.float()
19 | x = pose[:, 0]
20 | y = pose[:, 1]
21 | t = pose[:, 2]
22 |
23 | bs = x.size(0)
24 | t = t * np.pi / 180.
25 | cos_t = t.cos()
26 | sin_t = t.sin()
27 |
28 | theta11 = torch.stack([cos_t, -sin_t,
29 | torch.zeros(cos_t.shape).float().to(device)], 1)
30 | theta12 = torch.stack([sin_t, cos_t,
31 | torch.zeros(cos_t.shape).float().to(device)], 1)
32 | theta1 = torch.stack([theta11, theta12], 1)
33 |
34 | theta21 = torch.stack([torch.ones(x.shape).to(device),
35 | -torch.zeros(x.shape).to(device), x], 1)
36 | theta22 = torch.stack([torch.zeros(x.shape).to(device),
37 | torch.ones(x.shape).to(device), y], 1)
38 | theta2 = torch.stack([theta21, theta22], 1)
39 |
40 | rot_grid = F.affine_grid(theta1, torch.Size(grid_size))
41 | trans_grid = F.affine_grid(theta2, torch.Size(grid_size))
42 |
43 | return rot_grid, trans_grid
44 |
45 |
46 | class ChannelPool(nn.MaxPool1d):
47 | def forward(self, x):
48 | n, c, w, h = x.size()
49 | x = x.view(n, c, w * h).permute(0, 2, 1)
50 | x = x.contiguous()
51 | pooled = F.max_pool1d(x, c, 1)
52 | _, _, c = pooled.size()
53 | pooled = pooled.permute(0, 2, 1)
54 | return pooled.view(n, c, w, h)
55 |
56 |
57 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/utils.py#L32
58 | class AddBias(nn.Module):
59 | def __init__(self, bias):
60 | super(AddBias, self).__init__()
61 | self._bias = nn.Parameter(bias.unsqueeze(1))
62 |
63 | def forward(self, x):
64 | if x.dim() == 2:
65 | bias = self._bias.t().view(1, -1)
66 | else:
67 | bias = self._bias.t().view(1, -1, 1, 1)
68 |
69 | return x + bias
70 |
71 |
72 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L10
73 | class Flatten(nn.Module):
74 | def forward(self, x):
75 | return x.view(x.size(0), -1)
76 |
77 |
78 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L82
79 | class NNBase(nn.Module):
80 |
81 | def __init__(self, recurrent, recurrent_input_size, hidden_size):
82 |
83 | super(NNBase, self).__init__()
84 | self._hidden_size = hidden_size
85 | self._recurrent = recurrent
86 |
87 | if recurrent:
88 | self.gru = nn.GRUCell(recurrent_input_size, hidden_size)
89 | nn.init.orthogonal_(self.gru.weight_ih.data)
90 | nn.init.orthogonal_(self.gru.weight_hh.data)
91 | self.gru.bias_ih.data.fill_(0)
92 | self.gru.bias_hh.data.fill_(0)
93 |
94 | @property
95 | def is_recurrent(self):
96 | return self._recurrent
97 |
98 | @property
99 | def rec_state_size(self):
100 | if self._recurrent:
101 | return self._hidden_size
102 | return 1
103 |
104 | @property
105 | def output_size(self):
106 | return self._hidden_size
107 |
108 | def _forward_gru(self, x, hxs, masks):
109 | if x.size(0) == hxs.size(0):
110 | x = hxs = self.gru(x, hxs * masks[:, None])
111 | else:
112 | # x is a (T, N, -1) tensor that has been flatten to (T * N, -1)
113 | N = hxs.size(0)
114 | T = int(x.size(0) / N)
115 |
116 | # unflatten
117 | x = x.view(T, N, x.size(1))
118 |
119 | # Same deal with masks
120 | masks = masks.view(T, N, 1)
121 |
122 | outputs = []
123 | for i in range(T):
124 | hx = hxs = self.gru(x[i], hxs * masks[i])
125 | outputs.append(hx)
126 |
127 | # x is a (T, N, -1) tensor
128 | x = torch.stack(outputs, dim=0)
129 | # flatten
130 | x = x.view(T * N, -1)
131 |
132 | return x, hxs
133 |
--------------------------------------------------------------------------------
/utils/optimization.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import re
3 |
4 | from torch import optim
5 |
6 |
7 | def get_optimizer(parameters, s):
8 | """
9 | Parse optimizer parameters.
10 | Input should be of the form:
11 | - "sgd,lr=0.01"
12 | - "adagrad,lr=0.1,lr_decay=0.05"
13 | """
14 | if "," in s:
15 | method = s[:s.find(',')]
16 | optim_params = {}
17 | for x in s[s.find(',') + 1:].split(','):
18 | split = x.split('=')
19 | assert len(split) == 2
20 | assert re.match(
21 | r"^[+-]?(\d+(\.\d*)?|\.\d+)$",
22 | split[1]) is not None
23 | optim_params[split[0]] = float(split[1])
24 | else:
25 | method = s
26 | optim_params = {}
27 |
28 | if method == 'adadelta':
29 | optim_fn = optim.Adadelta
30 | elif method == 'adagrad':
31 | optim_fn = optim.Adagrad
32 | elif method == 'adam':
33 | optim_fn = optim.Adam
34 | optim_params['betas'] = (optim_params.get('beta1', 0.5),
35 | optim_params.get('beta2', 0.999))
36 | optim_params.pop('beta1', None)
37 | optim_params.pop('beta2', None)
38 | elif method == 'adamax':
39 | optim_fn = optim.Adamax
40 | elif method == 'asgd':
41 | optim_fn = optim.ASGD
42 | elif method == 'rmsprop':
43 | optim_fn = optim.RMSprop
44 | elif method == 'rprop':
45 | optim_fn = optim.Rprop
46 | elif method == 'sgd':
47 | optim_fn = optim.SGD
48 | assert 'lr' in optim_params
49 | else:
50 | raise Exception('Unknown optimization method: "%s"' % method)
51 |
52 | # check that we give good parameters to the optimizer
53 | expected_args = inspect.getargspec(optim_fn.__init__)[0]
54 | assert expected_args[:2] == ['self', 'params']
55 | if not all(k in expected_args[2:] for k in optim_params.keys()):
56 | raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
57 | str(expected_args[2:]), str(optim_params.keys())))
58 |
59 | return optim_fn(parameters, **optim_params)
60 |
--------------------------------------------------------------------------------
/utils/storage.py:
--------------------------------------------------------------------------------
1 | # The following code is largely borrowed from:
2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/storage.py
3 |
4 | from collections import namedtuple
5 |
6 | import numpy as np
7 | import torch
8 | from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
9 |
10 |
11 | def _flatten_helper(T, N, _tensor):
12 | return _tensor.view(T * N, *_tensor.size()[2:])
13 |
14 |
15 | class RolloutStorage(object):
16 |
17 | def __init__(self, num_steps, num_processes, obs_shape, action_space,
18 | rec_state_size):
19 |
20 | if action_space.__class__.__name__ == 'Discrete':
21 | self.n_actions = 1
22 | action_type = torch.long
23 | else:
24 | self.n_actions = action_space.shape[0]
25 | action_type = torch.float32
26 |
27 | self.obs = torch.zeros(num_steps + 1, num_processes, *obs_shape)
28 | self.rec_states = torch.zeros(num_steps + 1, num_processes,
29 | rec_state_size)
30 | self.rewards = torch.zeros(num_steps, num_processes)
31 | self.value_preds = torch.zeros(num_steps + 1, num_processes)
32 | self.returns = torch.zeros(num_steps + 1, num_processes)
33 | self.action_log_probs = torch.zeros(num_steps, num_processes)
34 | self.actions = torch.zeros((num_steps, num_processes, self.n_actions),
35 | dtype=action_type)
36 | self.masks = torch.ones(num_steps + 1, num_processes)
37 |
38 | self.num_steps = num_steps
39 | self.step = 0
40 | self.has_extras = False
41 | self.extras_size = None
42 |
43 | def to(self, device):
44 | self.obs = self.obs.to(device)
45 | self.rec_states = self.rec_states.to(device)
46 | self.rewards = self.rewards.to(device)
47 | self.value_preds = self.value_preds.to(device)
48 | self.returns = self.returns.to(device)
49 | self.action_log_probs = self.action_log_probs.to(device)
50 | self.actions = self.actions.to(device)
51 | self.masks = self.masks.to(device)
52 | if self.has_extras:
53 | self.extras = self.extras.to(device)
54 | return self
55 |
56 | def insert(self, obs, rec_states, actions, action_log_probs, value_preds,
57 | rewards, masks):
58 | self.obs[self.step + 1].copy_(obs)
59 | self.rec_states[self.step + 1].copy_(rec_states)
60 | self.actions[self.step].copy_(actions.view(-1, self.n_actions))
61 | self.action_log_probs[self.step].copy_(action_log_probs)
62 | self.value_preds[self.step].copy_(value_preds)
63 | self.rewards[self.step].copy_(rewards)
64 | self.masks[self.step + 1].copy_(masks)
65 |
66 | self.step = (self.step + 1) % self.num_steps
67 |
68 | def after_update(self):
69 | self.obs[0].copy_(self.obs[-1])
70 | self.rec_states[0].copy_(self.rec_states[-1])
71 | self.masks[0].copy_(self.masks[-1])
72 | if self.has_extras:
73 | self.extras[0].copy_(self.extras[-1])
74 |
75 | def compute_returns(self, next_value, use_gae, gamma, tau):
76 | if use_gae:
77 | self.value_preds[-1] = next_value
78 | gae = 0
79 | for step in reversed(range(self.rewards.size(0))):
80 | delta = self.rewards[step] + gamma \
81 | * self.value_preds[step + 1] * self.masks[step + 1] \
82 | - self.value_preds[step]
83 | gae = delta + gamma * tau * self.masks[step + 1] * gae
84 | self.returns[step] = gae + self.value_preds[step]
85 | else:
86 | self.returns[-1] = next_value
87 | for step in reversed(range(self.rewards.size(0))):
88 | self.returns[step] = self.returns[step + 1] * gamma \
89 | * self.masks[step + 1] + self.rewards[step]
90 |
91 | def feed_forward_generator(self, advantages, num_mini_batch):
92 |
93 | num_steps, num_processes = self.rewards.size()[0:2]
94 | batch_size = num_processes * num_steps
95 | mini_batch_size = batch_size // num_mini_batch
96 | assert batch_size >= num_mini_batch, (
97 | "PPO requires the number of processes ({}) "
98 | "* number of steps ({}) = {} "
99 | "to be greater than or equal to "
100 | "the number of PPO mini batches ({})."
101 | "".format(num_processes, num_steps, num_processes * num_steps,
102 | num_mini_batch))
103 |
104 | sampler = BatchSampler(SubsetRandomSampler(range(batch_size)),
105 | mini_batch_size, drop_last=False)
106 |
107 | for indices in sampler:
108 | yield {
109 | 'obs': self.obs[:-1].view(-1, *self.obs.size()[2:])[indices],
110 | 'rec_states': self.rec_states[:-1].view(
111 | -1, self.rec_states.size(-1))[indices],
112 | 'actions': self.actions.view(-1, self.n_actions)[indices],
113 | 'value_preds': self.value_preds[:-1].view(-1)[indices],
114 | 'returns': self.returns[:-1].view(-1)[indices],
115 | 'masks': self.masks[:-1].view(-1)[indices],
116 | 'old_action_log_probs': self.action_log_probs.view(-1)[indices],
117 | 'adv_targ': advantages.view(-1)[indices],
118 | 'extras': self.extras[:-1].view(
119 | -1, self.extras_size)[indices]
120 | if self.has_extras else None,
121 | }
122 |
123 | def recurrent_generator(self, advantages, num_mini_batch):
124 |
125 | num_processes = self.rewards.size(1)
126 | assert num_processes >= num_mini_batch, (
127 | "PPO requires the number of processes ({}) "
128 | "to be greater than or equal to the number of "
129 | "PPO mini batches ({}).".format(num_processes, num_mini_batch))
130 | num_envs_per_batch = num_processes // num_mini_batch
131 | perm = torch.randperm(num_processes)
132 | T, N = self.num_steps, num_envs_per_batch
133 |
134 | for start_ind in range(0, num_processes, num_envs_per_batch):
135 |
136 | obs = []
137 | rec_states = []
138 | actions = []
139 | value_preds = []
140 | returns = []
141 | masks = []
142 | old_action_log_probs = []
143 | adv_targ = []
144 | if self.has_extras:
145 | extras = []
146 |
147 | for offset in range(num_envs_per_batch):
148 |
149 | ind = perm[start_ind + offset]
150 | obs.append(self.obs[:-1, ind])
151 | rec_states.append(self.rec_states[0:1, ind])
152 | actions.append(self.actions[:, ind])
153 | value_preds.append(self.value_preds[:-1, ind])
154 | returns.append(self.returns[:-1, ind])
155 | masks.append(self.masks[:-1, ind])
156 | old_action_log_probs.append(self.action_log_probs[:, ind])
157 | adv_targ.append(advantages[:, ind])
158 | if self.has_extras:
159 | extras.append(self.extras[:-1, ind])
160 |
161 | # These are all tensors of size (T, N, ...)
162 | obs = torch.stack(obs, 1)
163 | actions = torch.stack(actions, 1)
164 | value_preds = torch.stack(value_preds, 1)
165 | returns = torch.stack(returns, 1)
166 | masks = torch.stack(masks, 1)
167 | old_action_log_probs = torch.stack(old_action_log_probs, 1)
168 | adv_targ = torch.stack(adv_targ, 1)
169 | if self.has_extras:
170 | extras = torch.stack(extras, 1)
171 |
172 | yield {
173 | 'obs': _flatten_helper(T, N, obs),
174 | 'actions': _flatten_helper(T, N, actions),
175 | 'value_preds': _flatten_helper(T, N, value_preds),
176 | 'returns': _flatten_helper(T, N, returns),
177 | 'masks': _flatten_helper(T, N, masks),
178 | 'old_action_log_probs': _flatten_helper(
179 | T, N, old_action_log_probs),
180 | 'adv_targ': _flatten_helper(T, N, adv_targ),
181 | 'extras': _flatten_helper(
182 | T, N, extras) if self.has_extras else None,
183 | 'rec_states': torch.stack(rec_states, 1).view(N, -1),
184 | }
185 |
186 |
187 | class GlobalRolloutStorage(RolloutStorage):
188 |
189 | def __init__(self, num_steps, num_processes, obs_shape, action_space,
190 | rec_state_size, extras_size):
191 | super(GlobalRolloutStorage, self).__init__(
192 | num_steps, num_processes, obs_shape, action_space, rec_state_size)
193 | self.extras = torch.zeros((num_steps + 1, num_processes, extras_size),
194 | dtype=torch.long)
195 | self.has_extras = True
196 | self.extras_size = extras_size
197 |
198 | def insert(self, obs, rec_states, actions, action_log_probs, value_preds,
199 | rewards, masks, extras):
200 | self.extras[self.step + 1].copy_(extras)
201 | super(GlobalRolloutStorage, self).insert(
202 | obs, rec_states, actions,
203 | action_log_probs, value_preds, rewards, masks)
204 |
--------------------------------------------------------------------------------